Skip to content

Instantly share code, notes, and snippets.

res_dim = 1024
if __name__ == "__main__":
"""loading the data,
reading the file annotations,
appending the tabular coordinates to formulate a dataframe
"""
df_org = pd.DataFrame()
directory = '/content/drive/MyDrive/data_cs2'
final_col_directory = '/content/drive/MyDrive/cs2_col'
res_dim = 1024
if __name__ == "__main__":
"""loading the data,
reading the file annotations,
appending the tabular coordinates to formulate a dataframe
"""
df_org = pd.DataFrame()
directory = '/content/drive/MyDrive/data_cs2'
final_col_directory = '/content/drive/MyDrive/cs2_col'
<annotation verified="yes">
<folder>MARMOT_ANNOTATION</folder>
<filename>10.1.1.1.2006_3.bmp</filename>
<path>/home/monika/Desktop/MARMOT_ANNOTATION/10.1.1.1.2006_3.bmp</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>793</width>
<height>1123</height>
def final_fun_1(X):
""" function takes raw data as input,preprocessing is done,
feature engineering is performed and predictions made on the
best model already trained"""
d_beneficiary = pd.read_csv('health_cs_data/' + X[0])
d_inpatient = pd.read_csv('health_cs_data/' + X[1])
d_outpatient = pd.read_csv('health_cs_data/' + X[2])
d_labels = pd.read_csv('health_cs_data/' + X[3])
providerID y_predicted
0 PRV57070 0
1 PRV57070 1
2 PRV57070 0
3 PRV57070 0
4 PRV57070 0
5 PRV57070 1
6 PRV57070 0
7 PRV57070 1
8 PRV57070 0
def final_fun_1(X):
""" function takes raw data as input,preprocessing is done,
feature engineering is performed and predictions made on the
best model already trained"""
d_beneficiary = pd.read_csv('health_cs_data/' + X[0])
d_inpatient = pd.read_csv('health_cs_data/' + X[1])
d_outpatient = pd.read_csv('health_cs_data/' + X[2])
d_labels = pd.read_csv('health_cs_data/' + X[3])
def find_best_threshold(threshold, fpr, tpr):
t = threshold[np.argmax(tpr*(1-fpr))]
print("the maximum value of tpr*(1-fpr)", max(tpr*(1-fpr)), "for threshold", np.round(t,3))
return t
def predict_with_best_t(proba, threshold):
predictions = []
for i in proba:
if i>=threshold:
predictions.append(1)
+---------+-----------------+----------------+----------------+---------------+
| Model | Train AUC Score | Test AUC Score | Train F1 Score | Test F1 Score |
+---------+-----------------+----------------+----------------+---------------+
| XgBoost | 0.99938 | 0.99855 | 0.9998 | 0.990791 |
+---------+-----------------+----------------+----------------+---------------+ +---------------+-----------------+----------------+----------------+---------------+
| Model | Train AUC Score | Test AUC Score | Train F1 Score | Test F1 Score |
+---------------+-----------------+----------------+----------------+---------------+
| Decision_Tree | 0.9967 | 0.9909 | 0.99314 | 0.9771 |
+---------------+-----------------+----------------+----------------+---------------+ +---------------------+-----------------+----------------+----------------+---------------+
| Model | Train AUC Score | Test AUC Score | Train F1 Score | Test F1 Score |
#In the 80% train set, split the train set into d1 and d2.(50-50).
d1,d2,y1,y2 = train_test_split(X_train,y_train,stratify=y_train,test_size=0.5,random_state=15)
d1 = d1.reset_index(drop=True)
d2 = d2.reset_index(drop=True)
y1 = y1.reset_index(drop=True)
y2 = y2.reset_index(drop=True)
def generating_samples(d1, y1):
"""From this d1,sampling with replacement is done
"""
+--------------------------------+-------------+---------------+
| Custom_Stacking_Implementation | Base Models | Test F1 Score |
+--------------------------------+-------------+---------------+
| | 50 | 0.980001 |
| | 100 | 0.981479 |
| | 150 | 0.982725 |
+--------------------------------+-------------+---------------+