PranjalDureja0002’s gists

PranjalDureja0002 / data.py

Created May 7, 2021 11:21

data

	res_dim = 1024

	if __name__ == "__main__":
	"""loading the data,
	reading the file annotations,
	appending the tabular coordinates to formulate a dataframe
	"""
	df_org = pd.DataFrame()
	directory = '/content/drive/MyDrive/data_cs2'
	final_col_directory = '/content/drive/MyDrive/cs2_col'

PranjalDureja0002 / xml_parse.py

Created May 7, 2021 11:18

xml_parse

	res_dim = 1024

	if __name__ == "__main__":
	"""loading the data,
	reading the file annotations,
	appending the tabular coordinates to formulate a dataframe
	"""
	df_org = pd.DataFrame()
	directory = '/content/drive/MyDrive/data_cs2'
	final_col_directory = '/content/drive/MyDrive/cs2_col'

PranjalDureja0002 / data.py

Created May 7, 2021 11:13

data

	<annotation verified="yes">
	<folder>MARMOT_ANNOTATION</folder>
	<filename>10.1.1.1.2006_3.bmp</filename>
	<path>/home/monika/Desktop/MARMOT_ANNOTATION/10.1.1.1.2006_3.bmp</path>
	<source>
	<database>Unknown</database>
	</source>
	<size>
	<width>793</width>
	<height>1123</height>

PranjalDureja0002 / final.py

Created March 15, 2021 10:13

final

	def final_fun_1(X):
	""" function takes raw data as input,preprocessing is done,
	feature engineering is performed and predictions made on the
	best model already trained"""

	d_beneficiary = pd.read_csv('health_cs_data/' + X[0])
	d_inpatient = pd.read_csv('health_cs_data/' + X[1])
	d_outpatient = pd.read_csv('health_cs_data/' + X[2])
	d_labels = pd.read_csv('health_cs_data/' + X[3])

PranjalDureja0002 / model.py

Created March 15, 2021 10:10

model

PranjalDureja0002 / final.py

Created March 15, 2021 10:10

final

	def final_fun_1(X):
	""" function takes raw data as input,preprocessing is done,
	feature engineering is performed and predictions made on the
	best model already trained"""

	d_beneficiary = pd.read_csv('health_cs_data/' + X[0])
	d_inpatient = pd.read_csv('health_cs_data/' + X[1])
	d_outpatient = pd.read_csv('health_cs_data/' + X[2])
	d_labels = pd.read_csv('health_cs_data/' + X[3])

PranjalDureja0002 / model.py

Created March 15, 2021 10:08

model

	def find_best_threshold(threshold, fpr, tpr):
	t = threshold[np.argmax(tpr*(1-fpr))]
	print("the maximum value of tpr(1-fpr)", max(tpr(1-fpr)), "for threshold", np.round(t,3))
	return t

	def predict_with_best_t(proba, threshold):
	predictions = []
	for i in proba:
	if i>=threshold:
	predictions.append(1)

PranjalDureja0002 / model.py

Created March 15, 2021 10:07

model

	+---------+-----------------+----------------+----------------+---------------+
	\| Model \| Train AUC Score \| Test AUC Score \| Train F1 Score \| Test F1 Score \|
	+---------+-----------------+----------------+----------------+---------------+
	\| XgBoost \| 0.99938 \| 0.99855 \| 0.9998 \| 0.990791 \|
	+---------+-----------------+----------------+----------------+---------------+ +---------------+-----------------+----------------+----------------+---------------+
	\| Model \| Train AUC Score \| Test AUC Score \| Train F1 Score \| Test F1 Score \|
	+---------------+-----------------+----------------+----------------+---------------+
	\| Decision_Tree \| 0.9967 \| 0.9909 \| 0.99314 \| 0.9771 \|
	+---------------+-----------------+----------------+----------------+---------------+ +---------------------+-----------------+----------------+----------------+---------------+
	\| Model \| Train AUC Score \| Test AUC Score \| Train F1 Score \| Test F1 Score \|

PranjalDureja0002 / model.py

Created March 15, 2021 10:06

model

	#In the 80% train set, split the train set into d1 and d2.(50-50).
	d1,d2,y1,y2 = train_test_split(X_train,y_train,stratify=y_train,test_size=0.5,random_state=15)
	d1 = d1.reset_index(drop=True)
	d2 = d2.reset_index(drop=True)
	y1 = y1.reset_index(drop=True)
	y2 = y2.reset_index(drop=True)

	def generating_samples(d1, y1):
	"""From this d1,sampling with replacement is done
	"""

PranjalDureja0002 / model.py

Created March 15, 2021 10:05

model

	+--------------------------------+-------------+---------------+
	\| Custom_Stacking_Implementation \| Base Models \| Test F1 Score \|
	+--------------------------------+-------------+---------------+
	\| \| 50 \| 0.980001 \|
	\| \| 100 \| 0.981479 \|
	\| \| 150 \| 0.982725 \|
	+--------------------------------+-------------+---------------+

	+--------------------------------+-------------+---------------+
	\| Custom_Stacking_Implementation \| Base Models \| Test F1 Score \|
	+--------------------------------+-------------+---------------+
	\| \| 50 \| 0.980001 \|
	\| \| 100 \| 0.981479 \|
	\| \| 150 \| 0.982725 \|
	+--------------------------------+-------------+---------------+