a-agmon · March 2, 2020 12:57
diff --git a/pu_est3.py b/pu_est3.py

 mod_data = data.copy()
 #get the indices of the positives samples
 pos_ind = np.where(mod_data.iloc[:,-1].values == 1)[0]
 #shuffle them
 np.random.shuffle(pos_ind)
 #now lets take 150 positives that will remain labeled
 pos_sample = pos_ind[:150]
 #create the new target variable and mark all the data set as unlabeled
 mod_data['class_test'] = -1
 # label just the 150 postive samples we have put aside
 mod_data.loc[pos_sample,'class_test'] = 1
 print('target variable:\n', mod_data.iloc[:,-1].value_counts())
 #remember that this data frame (x_data) includes the former target variable 
 # that we keep here just to compare the results
 x_data = mod_data.iloc[:,:-2].values
 y_labeled = mod_data.iloc[:,-1].values
 y_positive = mod_data.iloc[:,-2].values
 #PU estimation
 pu_estimator, probs1y1 = fit_PU_estimator(x_data, y_labeled, 0.2, xgb.XGBClassifier())
 y_predict = predict_PU_prob(x_data, pu_estimator, probs1y1)
 y_predict = [1 if x > 0.5 else 0 for x in y_predict]
 evaluate_results(y_positive, y_predict)

	mod_data = data.copy()
	#get the indices of the positives samples
	pos_ind = np.where(mod_data.iloc[:,-1].values == 1)[0]
	#shuffle them
	np.random.shuffle(pos_ind)
	#now lets take 150 positives that will remain labeled
	pos_sample = pos_ind[:150]
	#create the new target variable and mark all the data set as unlabeled
	mod_data['class_test'] = -1
	# label just the 150 postive samples we have put aside
	mod_data.loc[pos_sample,'class_test'] = 1
	print('target variable:\n', mod_data.iloc[:,-1].value_counts())
	#remember that this data frame (x_data) includes the former target variable
	# that we keep here just to compare the results
	x_data = mod_data.iloc[:,:-2].values
	y_labeled = mod_data.iloc[:,-1].values
	y_positive = mod_data.iloc[:,-2].values
	#PU estimation
	pu_estimator, probs1y1 = fit_PU_estimator(x_data, y_labeled, 0.2, xgb.XGBClassifier())
	y_predict = predict_PU_prob(x_data, pu_estimator, probs1y1)
	y_predict = [1 if x > 0.5 else 0 for x in y_predict]
	evaluate_results(y_positive, y_predict)