Created
March 2, 2020 12:57
-
-
Save a-agmon/a1512dde3e66c037aed25291be8656d2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mod_data = data.copy() | |
#get the indices of the positives samples | |
pos_ind = np.where(mod_data.iloc[:,-1].values == 1)[0] | |
#shuffle them | |
np.random.shuffle(pos_ind) | |
#now lets take 150 positives that will remain labeled | |
pos_sample = pos_ind[:150] | |
#create the new target variable and mark all the data set as unlabeled | |
mod_data['class_test'] = -1 | |
# label just the 150 postive samples we have put aside | |
mod_data.loc[pos_sample,'class_test'] = 1 | |
print('target variable:\n', mod_data.iloc[:,-1].value_counts()) | |
#remember that this data frame (x_data) includes the former target variable | |
# that we keep here just to compare the results | |
x_data = mod_data.iloc[:,:-2].values | |
y_labeled = mod_data.iloc[:,-1].values | |
y_positive = mod_data.iloc[:,-2].values | |
#PU estimation | |
pu_estimator, probs1y1 = fit_PU_estimator(x_data, y_labeled, 0.2, xgb.XGBClassifier()) | |
y_predict = predict_PU_prob(x_data, pu_estimator, probs1y1) | |
y_predict = [1 if x > 0.5 else 0 for x in y_predict] | |
evaluate_results(y_positive, y_predict) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment