lorenzoriano · November 15, 2017 01:18
diff --git a/test_ambiguous_lr.py b/test_ambiguous_lr.py
 import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
 import numpy as np

 NSAMPLE=5000
 x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
 r_data = np.float32(np.random.normal(size=(NSAMPLE,1)))
 y_data = np.float32(np.sin(0.75*x_data)*7.0+x_data*0.5+r_data*1.0)
 x_data, y_data = y_data, x_data #swap x and y
 X = x_data
 y = (y_data>5).astype(np.float)

 positive_indexes = y==1
 negative_indexes = y==0
 plt.plot(x_data[negative_indexes], y_data[negative_indexes], '.', label='Negative')
 plt.plot(x_data[positive_indexes], y_data[positive_indexes], '.', label='Positive')
 plt.legend(loc="best")
 plt.title("Real")

 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
 regularization = 1e60
 class_weight = {0:1, 1:2}
 clf = LogisticRegression(solver='liblinear', tol=1e-10, max_iter=10000, C=regularization, class_weight=class_weight);
 clf.fit(X_train, y_train.ravel());
 y_predicted = clf.predict(X)
 print "Coeff: {}, Intercept: {}".format(clf.coef_, clf.intercept_)
 print "Score over training: ", clf.score(X_train, y_train)
 print "Score over testing: ", clf.score(X_test, y_test)
 print "Score total: ", clf.score(X, y)
 print "Real percent of class 1: ", sum(y)/len(y)
 print "Predicted percent of class 1: ", float(sum(y_predicted==1))/len(y_predicted)
 positive_indexes = y_predicted==1
 negative_indexes = y_predicted==0
 plt.figure()
 plt.plot(x_data[negative_indexes], y_data[negative_indexes], '.', label='Negative')
 plt.plot(x_data[positive_indexes], y_data[positive_indexes], '.', label='Positive')
 plt.legend(loc="best")
 plt.title("Classifier")
 plt.show()
	import matplotlib.pyplot as plt
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LogisticRegression
	import numpy as np

	NSAMPLE=5000
	x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
	r_data = np.float32(np.random.normal(size=(NSAMPLE,1)))
	y_data = np.float32(np.sin(0.75x_data)7.0+x_data0.5+r_data1.0)
	x_data, y_data = y_data, x_data #swap x and y
	X = x_data
	y = (y_data>5).astype(np.float)

	positive_indexes = y==1
	negative_indexes = y==0
	plt.plot(x_data[negative_indexes], y_data[negative_indexes], '.', label='Negative')
	plt.plot(x_data[positive_indexes], y_data[positive_indexes], '.', label='Positive')
	plt.legend(loc="best")
	plt.title("Real")

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
	regularization = 1e60
	class_weight = {0:1, 1:2}
	clf = LogisticRegression(solver='liblinear', tol=1e-10, max_iter=10000, C=regularization, class_weight=class_weight);
	clf.fit(X_train, y_train.ravel());
	y_predicted = clf.predict(X)
	print "Coeff: {}, Intercept: {}".format(clf.coef_, clf.intercept_)
	print "Score over training: ", clf.score(X_train, y_train)
	print "Score over testing: ", clf.score(X_test, y_test)
	print "Score total: ", clf.score(X, y)
	print "Real percent of class 1: ", sum(y)/len(y)
	print "Predicted percent of class 1: ", float(sum(y_predicted==1))/len(y_predicted)
	positive_indexes = y_predicted==1
	negative_indexes = y_predicted==0
	plt.figure()
	plt.plot(x_data[negative_indexes], y_data[negative_indexes], '.', label='Negative')
	plt.plot(x_data[positive_indexes], y_data[positive_indexes], '.', label='Positive')
	plt.legend(loc="best")
	plt.title("Classifier")
	plt.show()