Created
February 11, 2020 23:14
-
-
Save ahwillia/f7d6d27e176df65d6baa6aee8b661042 to your computer and use it in GitHub Desktop.
Ridge CCA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.utils.extmath import randomized_svd | |
def partial_whiten(X, alpha, eigval_tol=1e-7): | |
""" | |
Return regularized whitening transform for a matrix X. | |
Parameters | |
---------- | |
X : ndarray | |
Matrix with shape `(m, n)` holding `m` observations | |
in `n`-dimensional feature space. Columns of `X` are | |
expected to be mean-centered so that `X.T @ X` is | |
the covariance matrix. | |
alpha : float | |
Regularization parameter, `0 <= alpha <= 1`. | |
eigval_tol : float | |
Eigenvalues of covariance matrix are clipped to this | |
minimum value. | |
Returns | |
------- | |
X_whitened : ndarray | |
Transformed data matrix. | |
Zx : ndarray | |
Matrix implementing the whitening transformation. | |
`X_whitened = X @ Zx`. | |
""" | |
XtX = (1 - alpha) * (X.T @ X) | |
XtX[np.diag_indices_from(XtX)] += alpha | |
w, v = np.linalg.eigh(XtX) | |
w[w < eigval_tol] = eigval_tol # clip minimum eigenvalue | |
# Matrix holding the whitening transformation. | |
Zx = np.linalg.multi_dot((v, np.diag(1 / np.sqrt(w)), v.T)) | |
# Returned (partially) whitened data and whitening matrix. | |
return X @ Zx, Zx | |
class RidgeCCA: | |
def __init__( | |
self, n_components=2, alpha=0.0, | |
center_data=True, svd_args=dict()): | |
""" | |
n_components: int, (default 2). | |
Number of components to keep. | |
alpha : float within the interval [0, 1], (default 0.0) | |
Strength of regularization on a scale between zero | |
(unregularized CCA) and one (Partial Least Squares). | |
svd_args : dict | |
Specifies parameters for truncated SVD solver | |
(see sklearn.decomposition.TruncatedSVD). | |
""" | |
self.n_components = n_components | |
self.alpha = alpha | |
self.center_data = center_data | |
self._svd_args = svd_args | |
def fit(self, X, Y): | |
"""Fit model to data.""" | |
# Mean-center data. | |
if self.center_data: | |
self.x_mean_ = x_mean = np.mean(X, axis=0) | |
self.y_mean_ = y_mean = np.mean(Y, axis=0) | |
Xc = X - x_mean[None, :] | |
Yc = Y - y_mean[None, :] | |
else: | |
self.x_mean_ = None | |
self.y_mean_ = None | |
Xc, Yc = X, Y | |
# Partially whiten both datasets. | |
Xw, Zx = partial_whiten(Xc, self.alpha) | |
Yw, Zy = partial_whiten(Yc, self.alpha) | |
# Compute SVD of cross-covariance matrix. | |
Xw_t_Yw = Xw.T @ Yw | |
U, S, Vt = randomized_svd( | |
Xw_t_Yw, self.n_components, **self._svd_args) | |
# Undo the whitening transformation to obtain the transformations | |
# on X and Y. | |
self.x_weights_ = Zx @ U | |
self.y_weights_ = Zy @ Vt.T | |
def transform(self, X, Y): | |
"""Apply the dimension reduction learned on the train data.""" | |
if self.center_data: | |
return ( | |
(X - self.x_mean_[None, :]) @ self.x_weights_, | |
(Y - self.y_mean_[None, :]) @ self.y_weights_ | |
) | |
else: | |
return X @ self.x_weights_, Y @ self.y_weights_ | |
def fit_transform(self, X, Y): | |
"""Learn and apply the dimension reduction on the train data.""" | |
self.fit(X, Y) | |
return self.transform(X, Y) | |
def canon_corrs(self, X, Y): | |
"""Return the canonical correlation coefficients.""" | |
tX, tY = self.transform(X, Y) | |
denom = np.linalg.norm(tX, axis=0) * np.linalg.norm(tY, axis=0) | |
return np.sum(tX * tY, axis=0) / denom |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment