loiseaujc · May 28, 2020 19:40
diff --git a/logistic_regression_gd.py b/logistic_regression_gd.py
 # --> Import standard Python libraries.
 import numpy as np
 from scipy.special import expit
 from scipy.linalg import norm

 # --> Import sklearn utility functions.
 from sklearn.base import BaseEstimator, ClassifierMixin


 class LogisticRegression_GD(BaseEstimator, ClassifierMixin):

    """
    Implementation of Logistic Regression. Minimization is performed
    by gradient descent. Note that we assume a unit-term has been prepended
    to X for the sake of simplicity.
    """

    def __init__(self, maxiter=1000, tol=1e-6):

        # --> Maximum number of iterations.
        self.maxiter = maxiter

        # --> Tolerance for the optimizer.
        self.tol = tol

    def predict(self, X):
        return np.rint(self.predict_proba(X)).astype(np.int)

    def predict_proba(self, X):
        return expit(X @ self.weights)

    def fit(self, X, y):
        """
        Implementation of the gradient descent method with optimal
        learning rate following [1].

        INPUT
        -----

        X : numpy 2D array. Each row corresponds to one training example.
            It is assumed that the first column is a column of ones (bias).

        y : numpy 1D array. Label (0 or 1) of each example.

        OUTPUT
        ------

        self : The trained logistic regression model.

        References
        ----------

        [1] R. Yedida & S. Saha. LipschitzLR: Using theoritically computed
        adaptive learning rates for fast convergence. arXiv eprint 1902.07399.
        """

        # --> Number of examples and features.
        m, n = X.shape

        # --> Initialize the weights.
        self.weights = np.zeros((n, ))

        # --> Compute optimal learning rate (see [1]).
        alpha = 2*m / norm(X)

        # --> Training using gradient descent and optimal stepsize.
        for _ in range(self.maxiter):

            # --> Compute the gradient.
            grad = X.T @ (self.predict_proba(X) - y) / m

            # --> Update the weights.
            self.weights -= alpha * grad

            # --> Check for convergence.
            if norm(grad)**2 < self.tol:
                break

        return self
	# --> Import standard Python libraries.
	import numpy as np
	from scipy.special import expit
	from scipy.linalg import norm

	# --> Import sklearn utility functions.
	from sklearn.base import BaseEstimator, ClassifierMixin


	class LogisticRegression_GD(BaseEstimator, ClassifierMixin):

	"""
	Implementation of Logistic Regression. Minimization is performed
	by gradient descent. Note that we assume a unit-term has been prepended
	to X for the sake of simplicity.
	"""

	def __init__(self, maxiter=1000, tol=1e-6):

	# --> Maximum number of iterations.
	self.maxiter = maxiter

	# --> Tolerance for the optimizer.
	self.tol = tol

	def predict(self, X):
	return np.rint(self.predict_proba(X)).astype(np.int)

	def predict_proba(self, X):
	return expit(X @ self.weights)

	def fit(self, X, y):
	"""
	Implementation of the gradient descent method with optimal
	learning rate following [1].

	INPUT
	-----

	X : numpy 2D array. Each row corresponds to one training example.
	It is assumed that the first column is a column of ones (bias).

	y : numpy 1D array. Label (0 or 1) of each example.

	OUTPUT
	------

	self : The trained logistic regression model.

	References
	----------

	[1] R. Yedida & S. Saha. LipschitzLR: Using theoritically computed
	adaptive learning rates for fast convergence. arXiv eprint 1902.07399.
	"""

	# --> Number of examples and features.
	m, n = X.shape

	# --> Initialize the weights.
	self.weights = np.zeros((n, ))

	# --> Compute optimal learning rate (see [1]).
	alpha = 2*m / norm(X)

	# --> Training using gradient descent and optimal stepsize.
	for _ in range(self.maxiter):

	# --> Compute the gradient.
	grad = X.T @ (self.predict_proba(X) - y) / m

	# --> Update the weights.
	self.weights -= alpha * grad

	# --> Check for convergence.
	if norm(grad)**2 < self.tol:
	break

	return self