Last active
December 21, 2023 18:26
-
-
Save raprasad/79265f84e94424fe3409c7022a99b47c to your computer and use it in GitHub Desktop.
PCA snippet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This snippet may be used after installing the following libraries: | |
# (1) OpenDP library with DP-PCA | |
# - reference: https://test.pypi.org/project/opendp/0.9.0.dev20231221001/ | |
# | |
pip install -i https://test.pypi.org/simple/ opendp==0.9.0.dev20231221001 | |
# (2) scikit-learn (includes numpy) | |
# | |
pip install scikit-learn | |
""" | |
import numpy as np | |
import opendp.prelude as dp | |
dp.enable_features("honest-but-curious", "contrib", "floating-point") | |
# DATASET CREATION | |
def sample_microdata(*, num_columns=None, num_rows=None, cov=None): | |
cov = cov or sample_covariance(num_columns) | |
microdata = np.random.multivariate_normal( | |
np.zeros(cov.shape[0]), cov, size=num_rows or 100_000 | |
) | |
microdata -= microdata.mean(axis=0) | |
return microdata | |
def sample_covariance(num_features): | |
A = np.random.uniform(0, num_features, size=(num_features, num_features)) | |
return A.T @ A | |
# USAGE EXAMPLE | |
num_columns = 4 | |
num_rows = 10_000 | |
model = dp.PCA( | |
epsilon=1., | |
row_norm=1., | |
n_samples=num_rows, | |
n_features=4, | |
) | |
model.fit(sample_microdata(num_columns=num_columns, num_rows=num_rows)) | |
# EXTRACT RELEASES | |
print("mean", model.mean_) | |
print("singular values", model.singular_values_) | |
print("components", model.components_) | |
loadings = model.singular_values_ * model.components_ | |
print("loadings", loadings) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment