Skip to content

Instantly share code, notes, and snippets.

View adhadse's full-sized avatar
🐧

Anurag Dhadse adhadse

🐧
View GitHub Profile
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_recall_curve
# First you need to get the precisions, recalls, thresholds for your classifier.
y_scores = cross_val_predict(classifier, X_train, y_train, cv=3, method='decision_function')
precisions, recalls, thresholds = precision_recall_curve(y_train, y_scores)
def plot_precision_vs_recall(precisions, recalls, metric_name=None, metric_perc=None):
plt.figure(figsize=(15, 10))
plt.plot(recalls, precisions, 'b-', linewidth=2)
plt.xlabel("Recall", fontsize=15)
plt.ylabel("Precision", fontsize=15)
plt.axis([0, 1, 0, 1])
if metric_name=='precision':
# tradedoff recall & threshold at percentage precision we want.
recall_atperc_precision = recalls[np.argmax(precisions >= metric_perc)]
threshold_atperc_precision = thresholds[np.argmax(precisions >= metric_perc)]
In[1] y_scores = clf.decision_function([instance_from_test_set])
no_threshold = 0
y_some_prediction = (y_scores > no_hreshold)
y_some_prediction
Out[1] array([True])
In[2] # Threshold returned from plotting func
y_some_prediction = (y_score > threshold)
y_some_prediction
Out[2] array([False])
def determine_outlier_thresholds_iqr(dataframe, col_name, th1=0.25, th3=0.75):
quartile1 = dataframe[col_name].quantile(th1)
quartile3 = dataframe[col_name].quantile(th3)
iqr = quartile3 - quartile1
upper_limit = quartile3 + 1.5 * iqr
lower_limit = quartile1 - 1.5 * iqr
return lower_limit, upper_limit
def check_outliers_iqr(dataframe, col_name):
lower_limit, upper_limit = determine_outlier_thresholds_iqr(dataframe, col_name)
if dataframe[(dataframe[col_name] > upper_limit) | (dataframe[col_name] < lower_limit)].any(axis=None):
return True
else:
return False
def replace_with_thresholds_iqr(dataframe,cols, th1=0.05, th3=0.95, replace=False):
from tabulate import tabulate
data = []
def determine_outlier_thresholds_std(dataframe, col_name):
upper_boundary = dataframe[col_name].mean() + 3 * dataframe[col_name].std()
lower_boundary = dataframe[col_name].mean() - 3 * dataframe[col_name].std()
return lower_boundary, upper_boundary
def check_outliers_std(dataframe, col_name):
lower_boundary, upper_boundary = determine_outlier_thresholds_std(dataframe, col_name)
if dataframe[(dataframe[col_name] > upper_boundary) | (dataframe[col_name] < lower_boundary)].any(axis=None):
return True
else:
return False
def replace_with_thresholds_std(dataframe, cols, replace=False):
from tabulate import tabulate
data = []
import tensorflow as tf
from tensorflow import keras
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="relu"),
keras.layers.Dense(100, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])