This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_logistic_regressions(X_train,y_train,figsize=(10,10)): | |
logreg = LogisticRegression(solver='lbfgs') | |
logreg.fit(X_train, y_train) | |
coefficients = logreg.coef_ | |
intercept = logreg.intercept_ | |
df_logreg = pd.DataFrame({'Feature':X_train.columns,'Coef':logreg.coef_[0]}) | |
fig, ax = plt.subplots(figsize=figsize) | |
sns.barplot(x="Coef", y="Feature", data=df_logreg, ax=ax) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_and_visualize_tree(X_train,y_train,max_depth=3): | |
decision_tree = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=1,random_state=1) | |
decision_tree = decision_tree.fit(X_train, y_train) | |
tree_str = export_graphviz(decision_tree, feature_names=X_train.columns, | |
filled=True, out_file=None) | |
graph = pydotplus.graph_from_dot_data(tree_str) | |
graph.write_png('dt.png') | |
display(Image('dt.png')) | |
return decision_tree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_confusion_matrix(cm_list,target_names,title_list,cmap=None,normalize=True,float_format_str='{:,.2f}'): | |
plt.figure(figsize=(10,5)) | |
print('{}_count={:d}\n{}_count={:d}'.format(target_names[0],cm_list[0][0].sum(),target_names[1],cm_list[0][1].sum())) | |
stats_list = [] | |
for i in range(len(cm_list)): | |
model_name = title_list[i] | |
cm = cm_list[i] | |
actual_phishy= cm[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def take_roc_curve(X_test,model): | |
y_preds = model.predict_proba(X_test) | |
preds = y_preds[:,1] | |
fpr, tpr, _ = metrics.roc_curve(y_test, preds) | |
precision, recall, _ = metrics.precision_recall_curve(y_test, preds) | |
auc_score = metrics.auc(fpr, tpr) | |
plt.figure(figsize=(10,5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_logs_from_classifiers(classifiers): | |
log_cols=["Classifier", "Accuracy", "Log Loss"] | |
log = pd.DataFrame(columns=log_cols) | |
for clf in classifiers: | |
name = clf.__class__.__name__ | |
print('Processing {} classifier'.format(name)) | |
clf.fit(X_train, y_train) | |
train_predictions = clf.predict(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow.keras.preprocessing.text import Tokenizer | |
sentences = [ | |
'I eat chicken', | |
'I do not eat fish', | |
'Did you eat fish?' | |
] | |
tokenizer = Tokenizer(num_words = 100, oov_token="<OOV>") | |
tokenizer.fit_on_texts(sentences) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
sequences = tokenizer.texts_to_sequences(sentences) | |
padded = pad_sequences(sequences, maxlen=5) | |
print("\nThe Word Index = " , word_index) | |
print("\nThe Sequences = " , sequences) | |
print("\nThe Padded Sequences:") | |
print(padded) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = tf.keras.Sequential([ | |
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), | |
tf.keras.layers.GlobalAveragePooling1D(), | |
tf.keras.layers.Dense(24, activation='relu'), | |
tf.keras.layers.Dense(1, activation='sigmoid') | |
]) | |
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fixed_width_cut(df,feature,labels=['Low','Medium','High']): | |
feature_slice, retbins = pd.cut(df[feature], len(labels) ,retbins=True, labels=labels) | |
retbins = [ '%.2f' % elem for elem in retbins ] | |
return feature_slice,retbins | |
def quartile_cut(df,feature,labels=['Low','Medium','High']): | |
feature_slice, retbins = pd.qcut(df[feature], q=len(labels),retbins=True,labels=labels) | |
retbins = [ '%.2f' % elem for elem in retbins ] | |
return feature_slice,retbins |
OlderNewer