This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def transform_signal(signal, n_dim=160, min_max=(-1,1)): | |
# convert data into -1 to 1 | |
signal_std = standardize_data(signal, min_data=min_num, max_data=max_num) | |
# bucket or chunk size, 5000 in this case (800000 / 160) | |
bucket_size = int(800000 / n_dim) | |
# new_ts will be the container of the new data | |
new_signal = [] | |
# this for iteract any chunk/bucket until reach the whole sample_size (800000) | |
for i in range(0, 800000 , bucket_size): | |
# cut each bucket to ts_range |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def data_preparation(start, end,praq_train): | |
# load a piece of data from file | |
praq_train = pq.read_pandas('/content/train.parquet', columns=[str(i) for i in range(start, end)]).to_pandas() | |
X = [] | |
y = [] | |
# using tdqm to evaluate processing time | |
# takes each index from df_train and iteract it from start to end | |
# it is divided by 3 because for each id_measurement there are 3 id_signal, and the start/end parameters are id_signal | |
for id_measurement in tqdm(df_metadata_train.index.levels[0].unique()[int(start/3):int(end/3)]): | |
X_signal = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parameters to tune for LR model | |
params = {'C': [10**x for x in range(-5,6)]} | |
# Create a custom (MCC) metric for evaluation of the model performance while | |
# hyperparameter tuning the XGBoost model | |
mcc = make_scorer(matthews_corrcoef, greater_is_better=True) | |
# Create an XGBoost classifier object with log-loss as the loss function to minimize | |
log_clf = LogisticRegression(random_state=42, class_weight='balanced') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hyperparameter tuning the XGBoost model | |
params ={'C':[10 ** x for x in range(-5, 3)]} | |
# Create a custom (MCC) metric for evaluation of the model performance while | |
mcc = make_scorer(matthews_corrcoef, greater_is_better=True) | |
# Create an XGBoost classifier object with log-loss as the loss function to minimize | |
svm_clf = svm.SVC(random_state=42, class_weight='balanced') | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hyperparameter tuning the XGBoost model | |
params ={'C':[10 ** x for x in range(-5, 3)],'gamma':[10 ** x for x in range(-5, 3)]} | |
# Create a custom (MCC) metric for evaluation of the model performance while | |
mcc = make_scorer(matthews_corrcoef, greater_is_better=True) | |
# Create an XGBoost classifier object with log-loss as the loss function to minimize | |
svm_clf = svm.SVC(random_state=42,kernel='rbf',class_weight='balanced') | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hyperparameter tuning the DecisionTree model | |
params ={'max_depth':[1, 5, 10, 50],'min_samples_split':[5, 10, 100, 500]} | |
# Create a custom (MCC) metric for evaluation of the model performance while | |
mcc = make_scorer(matthews_corrcoef, greater_is_better=True) | |
# Create an XGBoost classifier object with log-loss as the loss function to minimize | |
dt_clf = tree.DecisionTreeClassifier(random_state=42, class_weight='balanced') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parameters to tune for random forest model | |
params = {'n_estimators': [10, 50, 100, 500, 1000], | |
'max_depth': [2, 3, 4, 5, 6], | |
'min_samples_split': [0.02, 0.04, 0.08, 0.16, 0.32, 0.50]} | |
# Create a custom (MCC) metric for evaluation of the model performance while | |
# hyperparameter tuning the XGBoost model | |
mcc = make_scorer(matthews_corrcoef, greater_is_better=True) | |
# Create an XGBoost classifier object with log-loss as the loss function to minimize |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parameters to tune for XGBoost model | |
params = {'n_estimators': [10, 50, 100, 500, 1000], | |
'learning_rate': [0.0001,0.005,0.001,0.05, 0.1]} | |
# Create a custom (MCC) metric for evaluation of the model performance while | |
# hyperparameter tuning the XGBoost model | |
mcc = make_scorer(matthews_corrcoef, greater_is_better=True) | |
# Create an XGBoost classifier object with log-loss as the loss function to minimize | |
ada_clf = AdaBoostClassifier(random_state=10) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parameters to tune for XGBoost model | |
params = {'n_estimators': [10, 50, 100, 500, 1000], | |
'max_depth': [2, 3, 4, 5, 6], | |
'learning_rate': [0.0001,0.005,0.001,0.05, 0.1], | |
'reg_alpha': [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2], | |
'reg_lambda': [1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2]} | |
# Create a custom (MCC) metric for evaluation of the model performance while | |
# hyperparameter tuning the XGBoost model | |
mcc = make_scorer(matthews_corrcoef, greater_is_better=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y_test_probas = np.empty((X_test.shape[0], 5)) | |
model= xgb.XGBClassifier(learning_rate=0.5,max_depth=4,n_estimators=10,reg_alpha=0.01,reg_lambda=1.0,random_state=42) | |
model=model.fit(X_train,y_train) | |
for i in range(5): | |
y_test_probas[:,i] = model.predict_proba(X_test)[:,1] | |
#taking mean of all the predicted | |
y_test_proba = np.mean(y_test_probas, axis=1) |