This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.model_selection import cross_val_predict | |
| from sklearn.metrics import precision_recall_curve | |
| # First you need to get the precisions, recalls, thresholds for your classifier. | |
| y_scores = cross_val_predict(classifier, X_train, y_train, cv=3, method='decision_function') | |
| precisions, recalls, thresholds = precision_recall_curve(y_train, y_scores) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_precision_vs_recall(precisions, recalls, metric_name=None, metric_perc=None): | |
| plt.figure(figsize=(15, 10)) | |
| plt.plot(recalls, precisions, 'b-', linewidth=2) | |
| plt.xlabel("Recall", fontsize=15) | |
| plt.ylabel("Precision", fontsize=15) | |
| plt.axis([0, 1, 0, 1]) | |
| if metric_name=='precision': | |
| # tradedoff recall & threshold at percentage precision we want. | |
| recall_atperc_precision = recalls[np.argmax(precisions >= metric_perc)] | |
| threshold_atperc_precision = thresholds[np.argmax(precisions >= metric_perc)] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| In[1] y_scores = clf.decision_function([instance_from_test_set]) | |
| no_threshold = 0 | |
| y_some_prediction = (y_scores > no_hreshold) | |
| y_some_prediction | |
| Out[1] array([True]) | |
| In[2] # Threshold returned from plotting func | |
| y_some_prediction = (y_score > threshold) | |
| y_some_prediction | |
| Out[2] array([False]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def determine_outlier_thresholds_iqr(dataframe, col_name, th1=0.25, th3=0.75): | |
| quartile1 = dataframe[col_name].quantile(th1) | |
| quartile3 = dataframe[col_name].quantile(th3) | |
| iqr = quartile3 - quartile1 | |
| upper_limit = quartile3 + 1.5 * iqr | |
| lower_limit = quartile1 - 1.5 * iqr | |
| return lower_limit, upper_limit |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def check_outliers_iqr(dataframe, col_name): | |
| lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name) | |
| if dataframe[(dataframe[col_name] > upper_limit) | (dataframe[col_name] < lower_limit)].any(axis=None): | |
| return True | |
| else: | |
| return False | |
| def replace_with_thresholds_iqr(dataframe,cols, th1=0.05, th3=0.95, replace=False): | |
| from tabulate import tabulate | |
| data = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def determine_outlier_thresholds_std(dataframe, col_name): | |
| upper_boundary = dataframe[col_name].mean() + 3 * dataframe[col_name].std() | |
| lower_boundary = dataframe[col_name].mean() - 3 * dataframe[col_name].std() | |
| return lower_boundary, upper_boundary |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def check_outliers_std(dataframe, col_name): | |
| lower_boundary, upper_boundary = determine_outlier_thresholds_std(dataframe, col_name) | |
| if dataframe[(dataframe[col_name] > upper_boundary) | (dataframe[col_name] < lower_boundary)].any(axis=None): | |
| return True | |
| else: | |
| return False | |
| def replace_with_thresholds_std(dataframe, cols, replace=False): | |
| from tabulate import tabulate | |
| data = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import tensorflow as tf | |
| from tensorflow import keras |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model = keras.models.Sequential() | |
| model.add(keras.layers.Flatten(input_shape=[28, 28])) | |
| model.add(keras.layers.Dense(300, activation="relu")) | |
| model.add(keras.layers.Dense(100, activation="relu")) | |
| model.add(keras.layers.Dense(10, activation="softmax")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model = keras.models.Sequential([ | |
| keras.layers.Flatten(input_shape=[28, 28]), | |
| keras.layers.Dense(300, activation="relu"), | |
| keras.layers.Dense(100, activation="relu"), | |
| keras.layers.Dense(10, activation="softmax") | |
| ]) |