kusal1990’s gists

kusal1990 / Prediction on the test dataset.py

Created June 2, 2022 18:33

	y_test_probas = np.empty((X_test.shape[0], 5))
	model= xgb.XGBClassifier(learning_rate=0.5,max_depth=4,n_estimators=10,reg_alpha=0.01,reg_lambda=1.0,random_state=42)
	model=model.fit(X_train,y_train)
	for i in range(5):

	y_test_probas[:,i] = model.predict_proba(X_test)[:,1]

	#taking mean of all the predicted
	y_test_proba = np.mean(y_test_probas, axis=1)

kusal1990 / XGBoost_Classifier.py

Created June 2, 2022 18:31

	# Parameters to tune for XGBoost model
	params = {'n_estimators': [10, 50, 100, 500, 1000],
	'max_depth': [2, 3, 4, 5, 6],
	'learning_rate': [0.0001,0.005,0.001,0.05, 0.1],
	'reg_alpha': [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2],
	'reg_lambda': [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2]}

	# Create a custom (MCC) metric for evaluation of the model performance while
	# hyperparameter tuning the XGBoost model
	mcc = make_scorer(matthews_corrcoef, greater_is_better=True)

kusal1990 / AdaBoostClassifier.py

Created June 2, 2022 18:30

	# Parameters to tune for XGBoost model
	params = {'n_estimators': [10, 50, 100, 500, 1000],
	'learning_rate': [0.0001,0.005,0.001,0.05, 0.1]}

	# Create a custom (MCC) metric for evaluation of the model performance while
	# hyperparameter tuning the XGBoost model
	mcc = make_scorer(matthews_corrcoef, greater_is_better=True)

	# Create an XGBoost classifier object with log-loss as the loss function to minimize
	ada_clf = AdaBoostClassifier(random_state=10)

kusal1990 / Random_Forest_Classifier.py

Last active June 2, 2022 18:28

	# Parameters to tune for random forest model
	params = {'n_estimators': [10, 50, 100, 500, 1000],
	'max_depth': [2, 3, 4, 5, 6],
	'min_samples_split': [0.02, 0.04, 0.08, 0.16, 0.32, 0.50]}

	# Create a custom (MCC) metric for evaluation of the model performance while
	# hyperparameter tuning the XGBoost model
	mcc = make_scorer(matthews_corrcoef, greater_is_better=True)

	# Create an XGBoost classifier object with log-loss as the loss function to minimize

kusal1990 / DT.py

Created June 2, 2022 18:27


	# hyperparameter tuning the DecisionTree model
	params ={'max_depth':[1, 5, 10, 50],'min_samples_split':[5, 10, 100, 500]}
	# Create a custom (MCC) metric for evaluation of the model performance while

	mcc = make_scorer(matthews_corrcoef, greater_is_better=True)

	# Create an XGBoost classifier object with log-loss as the loss function to minimize
	dt_clf = tree.DecisionTreeClassifier(random_state=42, class_weight='balanced')

kusal1990 / Kernel_SVM.py

Created June 2, 2022 18:26

	# hyperparameter tuning the XGBoost model
	params ={'C':[10 x for x in range(-5, 3)],'gamma':[10 x for x in range(-5, 3)]}
	# Create a custom (MCC) metric for evaluation of the model performance while

	mcc = make_scorer(matthews_corrcoef, greater_is_better=True)

	# Create an XGBoost classifier object with log-loss as the loss function to minimize
	svm_clf = svm.SVC(random_state=42,kernel='rbf',class_weight='balanced')

kusal1990 / svm.py

Created June 2, 2022 18:25

	# hyperparameter tuning the XGBoost model
	params ={'C':[10 ** x for x in range(-5, 3)]}
	# Create a custom (MCC) metric for evaluation of the model performance while

	mcc = make_scorer(matthews_corrcoef, greater_is_better=True)

	# Create an XGBoost classifier object with log-loss as the loss function to minimize
	svm_clf = svm.SVC(random_state=42, class_weight='balanced')

kusal1990 / Logistic_Regression.py

Created June 2, 2022 18:24

	# Parameters to tune for LR model
	params = {'C': [10**x for x in range(-5,6)]}

	# Create a custom (MCC) metric for evaluation of the model performance while
	# hyperparameter tuning the XGBoost model
	mcc = make_scorer(matthews_corrcoef, greater_is_better=True)

	# Create an XGBoost classifier object with log-loss as the loss function to minimize
	log_clf = LogisticRegression(random_state=42, class_weight='balanced')

kusal1990 / data_processing.py

Created June 2, 2022 18:23

	def data_preparation(start, end,praq_train):
	# load a piece of data from file
	praq_train = pq.read_pandas('/content/train.parquet', columns=[str(i) for i in range(start, end)]).to_pandas()
	X = []
	y = []
	# using tdqm to evaluate processing time
	# takes each index from df_train and iteract it from start to end
	# it is divided by 3 because for each id_measurement there are 3 id_signal, and the start/end parameters are id_signal
	for id_measurement in tqdm(df_metadata_train.index.levels[0].unique()[int(start/3):int(end/3)]):
	X_signal = []

kusal1990 / transform_signal.py

Created June 2, 2022 18:21

	def transform_signal(signal, n_dim=160, min_max=(-1,1)):
	# convert data into -1 to 1
	signal_std = standardize_data(signal, min_data=min_num, max_data=max_num)
	# bucket or chunk size, 5000 in this case (800000 / 160)
	bucket_size = int(800000 / n_dim)
	# new_ts will be the container of the new data
	new_signal = []
	# this for iteract any chunk/bucket until reach the whole sample_size (800000)
	for i in range(0, 800000 , bucket_size):
	# cut each bucket to ts_range

KUSAL BERA kusal1990