Skip to content

Instantly share code, notes, and snippets.

@sjtalkar
Created June 21, 2021 13:56
Show Gist options
  • Save sjtalkar/e4f5448f9593e83a458f5d2164985073 to your computer and use it in GitHub Desktop.
Save sjtalkar/e4f5448f9593e83a458f5d2164985073 to your computer and use it in GitHub Desktop.
Creating a Biplot (University of Michigan Masters Unsupervised Learning)
def biplot(score, coeff, maxdim, pcax, pcay, labels=None):
"""
This function uses
score - the transformed data returned by pca - data as expressed according to the new axis
coeff - the loadings from the pca_components_
For the feaures we are interested in, it plots the correlation between the original features and the PCAs.
Use cosine similarity and angle measures between axes.
It shows how the data is related to the ORIGINAL features in the positive and negative direction.
"""
zoom = 0.5
pca1=pcax-1
pca2=pcay-1
xs = score[:,pca1]
ys = score[:,pca2]
n = min(coeff.shape[0], maxdim)
width = 2.0 * zoom
scalex = width/(xs.max()- xs.min())
scaley = width/(ys.max()- ys.min())
text_scale_factor = 1.3
fig = plt.gcf()
fig.set_size_inches(9, 9)
plt.scatter(xs*scalex, ys*scaley, s=9)
for i in range(n):
plt.arrow(0, 0, coeff[i,pca1], coeff[i,pca2],
color='b',alpha=0.9, head_width = 0.03 * zoom)
if labels is None:
plt.text(coeff[i,pca1]* text_scale_factor,
coeff[i,pca2] * text_scale_factor,
"Var"+str(i+1), color='g', ha='center', va='center')
else:
plt.text(coeff[i,pca1]* text_scale_factor,
coeff[i,pca2] * text_scale_factor,
labels[i], color='g', ha='center', va='center')
plt.xlim(-zoom,zoom)
plt.ylim(-zoom,zoom)
plt.xlabel("PC{}".format(pcax))
plt.ylabel("PC{}".format(pcay))
plt.grid()
return
plt.figure()
feature_subset = slice(0, feature_subset_count, 1)
biplot(X_pca, np.transpose(pca.components_[0:2, feature_subset]),
feature_subset_count, 1, 2, labels=feature_names[feature_subset])
print("explained_variance_ratio:", pca.explained_variance_ratio_)
print("sum of explained variance ratios:", np.sum(pca.explained_variance_ratio_))
print("singular values:", pca.singular_values_)
# The variances of the PCs are given by the squares of the singular values of X*, divided by n−1.
# Since they are the eigenvalues of the (n-1)S matrix where S is the correlation matrix of X
print(np.power(pca.singular_values_, 2) / (scaled_features.shape[0] - 1))
print(scaled_features.shape[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment