Skip to content

Instantly share code, notes, and snippets.

# https://www.kaggle.com/suicaokhoailang/lstm-attention-baseline-0-652-lb
class Attention(Layer):
def __init__(self, step_dim,
W_regularizer=None, b_regularizer=None,
W_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = initializers.get('glorot_uniform')
# Matthews correlation coefficient calculation used inside Keras model
def matthews_correlation(y_true, y_pred):
"""
Calculate Matthews Correlation Coefficient.
References
----------
.. [1] https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
.. [2] https://www.kaggle.com/tarunpaparaju/vsb-competition-attention-bilstm-with-features/notebook?scriptVersionId=10690570
"""
def model_lstm(input_shape):
# The shape was explained above, must have this order
inp = Input(shape=(input_shape[1], input_shape[2],))
# This is the LSTM layer
# Bidirecional implies that the 160 chunks are calculated in both ways, 0 to 159 and 159 to zero
# although it appear that just 0 to 159 way matter, I have tested with and without, and tha later worked best
# 128 and 64 are the number of cells used, too many can overfit and too few can underfit
x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(inp)
# x = Activation('relu')(x)
# x = Dropout(0.25)(x)
def threshold_search(y_true, y_proba):
best_threshold = 0
best_score = 0
for threshold in tqdm([i * 0.01 for i in range(100)]):
score = K.eval(matthews_correlation(y_true.astype(np.float64), (y_proba > threshold).astype(np.float64)))
if score > best_score:
best_threshold = threshold
best_score = score
search_result = {'threshold': best_threshold, 'matthews_correlation': best_score}
return search_result
preds_test = []
for i in range(N_SPLITS):
model.load_weights('weights_{}.h5'.format(i))
pred = model.predict(X_test_input, batch_size=300, verbose=1)
pred_3 = []
for pred_scalar in pred:
for i in range(3):
pred_3.append(pred_scalar)
preds_test.append(pred_3)
import flask
app = Flask(__name__)
@app.route('/')
def hello_world():
return 'Hello World!'
@app.route('/index8050')
def index8050():
# to open/create a new html file in the write mode
f = open('index8050.html', 'w')
# the html code which will go in the file GFG.html
html_template = """<html>
<head>
<title></title>
</head>
<body>
<h2>VSB Power Line Fault Detection:Classification</h2>
plt.figure(figsize=(16,8))
plt.title("Number of words in a question and it's frequency for challenge set")
sns.barplot([i[0] for i in sorted_x],[i[1] for i in sorted_x])
plt.xlabel("Number of words")
plt.ylabel("Frequency")
plt.show()
def Answer_graph(type_):
print(type_+" dataset Analysis ...............")
plt.figure(figsize=(16,4.5))
for i,env in enumerate(["Train","Test","Dev"],start = 1):
Answers = eval(env+"_multiple_main_"+type_+"[\'AnswerKey\'].value_counts().to_dict()")
plt.subplot(1,3,i)
plt.title(env+"_Answers Options vs Frequency("+type_+")")
sns.barplot([i for i in Answers.keys()],[i for i in Answers.values()])
plt.xlabel("Answer Options")
plt.ylabel("Frequency")
# Test SchoolGrade
def plot_subplot_bar(data,type_):
print(type_+" dataset Analysis ...............")
plt.figure(figsize=(16,4.5))
for i,v in enumerate(["Train","Test","Dev"],start = 1):
plt.subplot(1,3,i)
plt.title(v+"_schoolGrade vs No_of_times_used("+type_+")")
sns.distplot(eval(v+data))
plt.xticks([i for i in range(11)])
plt.ylabel("No_of_times_used")