kusal1990’s gists

kusal1990 / Attention_layer.py

Created June 2, 2022 18:34

	# https://www.kaggle.com/suicaokhoailang/lstm-attention-baseline-0-652-lb

	class Attention(Layer):
	def __init__(self, step_dim,
	W_regularizer=None, b_regularizer=None,
	W_constraint=None, b_constraint=None,
	bias=True, **kwargs):
	self.supports_masking = True
	self.init = initializers.get('glorot_uniform')

kusal1990 / Matthews_Correlation_Coefficient.py

Created June 2, 2022 18:35

	# Matthews correlation coefficient calculation used inside Keras model
	def matthews_correlation(y_true, y_pred):
	"""
	Calculate Matthews Correlation Coefficient.

	References
	----------
	.. [1] https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
	.. [2] https://www.kaggle.com/tarunpaparaju/vsb-competition-attention-bilstm-with-features/notebook?scriptVersionId=10690570
	"""

kusal1990 / lstm_model.py

Created June 2, 2022 18:36

	def model_lstm(input_shape):
	# The shape was explained above, must have this order
	inp = Input(shape=(input_shape[1], input_shape[2],))
	# This is the LSTM layer
	# Bidirecional implies that the 160 chunks are calculated in both ways, 0 to 159 and 159 to zero
	# although it appear that just 0 to 159 way matter, I have tested with and without, and tha later worked best
	# 128 and 64 are the number of cells used, too many can overfit and too few can underfit
	x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(inp)
	# x = Activation('relu')(x)
	# x = Dropout(0.25)(x)

kusal1990 / Threshold_selection.py

Created June 2, 2022 18:38

	def threshold_search(y_true, y_proba):
	best_threshold = 0
	best_score = 0
	for threshold in tqdm([i * 0.01 for i in range(100)]):
	score = K.eval(matthews_correlation(y_true.astype(np.float64), (y_proba > threshold).astype(np.float64)))
	if score > best_score:
	best_threshold = threshold
	best_score = score
	search_result = {'threshold': best_threshold, 'matthews_correlation': best_score}
	return search_result

kusal1990 / Prediction_test_dataset.py

Created June 2, 2022 18:40

	preds_test = []
	for i in range(N_SPLITS):
	model.load_weights('weights_{}.h5'.format(i))
	pred = model.predict(X_test_input, batch_size=300, verbose=1)
	pred_3 = []
	for pred_scalar in pred:
	for i in range(3):
	pred_3.append(pred_scalar)
	preds_test.append(pred_3)

kusal1990 / app_file.py

Created June 2, 2022 18:41

kusal1990 / url.py

Created June 2, 2022 18:43

	# to open/create a new html file in the write mode
	f = open('index8050.html', 'w')

	# the html code which will go in the file GFG.html
	html_template = """<html>
	<head>
	<title></title>
	</head>
	<body>
	<h2>VSB Power Line Fault Detection:Classification</h2>

kusal1990 / PLOT FOR QUESTION.py

Last active June 24, 2022 06:28

	plt.figure(figsize=(16,8))
	plt.title("Number of words in a question and it's frequency for challenge set")
	sns.barplot([i[0] for i in sorted_x],[i[1] for i in sorted_x])
	plt.xlabel("Number of words")
	plt.ylabel("Frequency")
	plt.show()

kusal1990 / plot for answer.py

Created June 24, 2022 06:39

	def Answer_graph(type_):
	print(type_+" dataset Analysis ...............")
	plt.figure(figsize=(16,4.5))
	for i,env in enumerate(["Train","Test","Dev"],start = 1):
	Answers = eval(env+"_multiple_main_"+type_+"[\'AnswerKey\'].value_counts().to_dict()")
	plt.subplot(1,3,i)
	plt.title(env+"_Answers Options vs Frequency("+type_+")")
	sns.barplot([i for i in Answers.keys()],[i for i in Answers.values()])
	plt.xlabel("Answer Options")
	plt.ylabel("Frequency")

kusal1990 / SCHOOL_GRADE.py

Created June 24, 2022 06:49

	# Test SchoolGrade
	def plot_subplot_bar(data,type_):
	print(type_+" dataset Analysis ...............")
	plt.figure(figsize=(16,4.5))
	for i,v in enumerate(["Train","Test","Dev"],start = 1):
	plt.subplot(1,3,i)
	plt.title(v+"_schoolGrade vs No_of_times_used("+type_+")")
	sns.distplot(eval(v+data))
	plt.xticks([i for i in range(11)])
	plt.ylabel("No_of_times_used")

KUSAL BERA kusal1990