Last active
November 21, 2017 06:49
-
-
Save cnh/a75ba53b7ec2cde980dcab2a36ce63f0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Nov 21 08:29:57 2017 | |
@author: tp53 | |
""" | |
import pandas as pd | |
import numpy as np | |
#read data and data wrangling | |
#added a row for response | |
#metf = metformin, n = normalize, r= raw dat, from csv, before doing transpose(T) & other manipulations | |
metf_n_r_data = pd.read_csv("./data/metf_n_resp.csv") | |
thiog_n_data = pd.read_csv("./data/THIOG_normalized.csv") | |
res_csv = pd.read_csv('./data/response.csv') | |
res_data = res_csv['METFORMIN'] | |
#build the target | |
target = res_data | |
#transpose metf_n_r_data, features in columns, samples(patient data) as rows | |
metf_n_r_t_data = metf_n_r_data.T | |
#drop the target column, and save rest of the sample data, as | |
#predictors = metf_n_data.drop(['response'], axis=1).as_matrix() | |
#create a copy of metf_n_r_t_data (its too long a name !!!, plus experiments) in df0 | |
df0 = metf_n_r_t_data.copy(deep=True) | |
df_no_r = df0.drop(df0.columns[[0]], axis=1) | |
predictors = df_no_r.as_matrix() | |
''' | |
predictors.shape will fail | |
''' | |
preds_wo_labels = predictors[1:] | |
''' | |
cant do preds_wo_labels.head | |
as preds_wo_labels is not a dataframe object, but numpy.ndarray | |
''' | |
preds_wo_labels_df = pd.DataFrame(data=preds_wo_labels) | |
import numpy as np | |
preds_wo_labels = np.matrix(preds_wo_labels, dtype=float) | |
preds_wo_labels | |
cov_matrix = np.cov(preds_wo_labels) | |
''' | |
lets calculate eigenvalues and eigenvectors | |
''' | |
eig_vals, eig_vecs = np.linalg.eig(cov_matrix) | |
eig_vals | |
#eig_vecs | |
preds_wo_labels[0] | |
target | |
# Visually confirm that the list is correctly sorted by decreasing eigenvalues | |
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))] | |
eig_pairs[0][0] | |
a = eig_pairs[0][1] | |
resp_np= np.array(target) | |
np.corrcoef(a, resp_np) | |
c1 =preds_wo_labels[:,0].T | |
c0 = np.mean(preds_wo_labels[:,0].T) | |
c0_std = np.std(c1) | |
preds_wo_labels_df.head() | |
print('Eigenvalues in descending order:') | |
for i in eig_pairs: | |
print(i) | |
''' | |
http://blog.districtdatalabs.com/principal-component-analysis-with-python | |
To get a better idea of how principal components describe the variance | |
in the data, | |
we will look at the explained variance ratio of | |
the first two principal components. | |
''' | |
from sklearn.manifold import TSNE | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
pca = PCA(n_components=2) | |
pca | |
pca.fit_transform(preds_wo_labels_df) | |
print(pca.explained_variance_ratio_) | |
#Explained variance | |
#pca = PCA().fit(X_std) | |
from sklearn.preprocessing import StandardScaler | |
X_std = StandardScaler().fit_transform(preds_wo_labels_df) | |
pca = PCA().fit(X_std) | |
import matplotlib.pyplot as plt | |
plt.plot(np.cumsum(pca.explained_variance_ratio_)) | |
plt.xlabel('number of components') | |
plt.ylabel('cumulative explained variance') | |
plt.show() | |
pca = PCA(n_components=21) | |
pca.fit_transform(preds_wo_labels_df) | |
print(pca.explained_variance_ratio_) | |
''' | |
''' | |
#lets do t-SNE, PCA | |
#tsne, pca imports | |
from sklearn.manifold import TSNE | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
''' | |
Summarizing the PCA approach | |
Listed below are the 6 general steps for performing a principal component analysis, which we will investigate in the following sections. | |
1. Take the whole dataset consisting of dd-dimensional samples ignoring the class labels | |
2. Compute the dd-dimensional mean vector (i.e., the means for every dimension of the whole dataset) | |
3. Compute the scatter matrix (alternatively, the covariance matrix) of the whole data set | |
4. Compute eigenvectors (ee1,ee2,...,eedee1,ee2,...,eed) and corresponding eigenvalues (λλ1,λλ2,...,λλdλλ1,λλ2,...,λλd) | |
5. Sort the eigenvectors by decreasing eigenvalues and choose kk eigenvectors with the largest eigenvalues to form a d×kd×k dimensional matrix WWWW(where every column represents an eigenvector) | |
6. Use this d×kd×k eigenvector matrix to transform the samples onto the new subspace. This can be summarized by the mathematical equation: yy=WWT×xxyy=WWT×xx (where xxxx is a d×1d×1-dimensional vector representing one sample, and yyyy is the transformed k×1k×1-dimensional sample in the new subspace.) | |
''' | |
''' | |
lets plot the data | |
''' | |
from matplotlib import pyplot as plt | |
from mpl_toolkits.mplot3d import Axes3D | |
from mpl_toolkits.mplot3d import proj3d | |
from matplotlib.patches import FancyArrowPatch | |
class Arrow3D(FancyArrowPatch): | |
def __init__(self, xs, ys, zs, *args, **kwargs): | |
FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs) | |
self._verts3d = xs, ys, zs | |
def draw(self, renderer): | |
xs3d, ys3d, zs3d = self._verts3d | |
xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M) | |
self.set_positions((xs[0],ys[0]),(xs[1],ys[1])) | |
FancyArrowPatch.draw(self, renderer) | |
fig = plt.figure(figsize=(8,8)) | |
ax = fig.add_subplot(111, projection='3d') | |
plt.rcParams['legend.fontsize'] = 10 | |
''' | |
lets separate samples between class1(response =0) | |
& class2(response=1) | |
''' | |
ax.plot(class1_sample[0,:], class1_sample[1,:], class1_sample[2,:], 'o', markersize=8, color='blue', alpha=0.5, label='class1') | |
ax.plot(class2_sample[0,:], class2_sample[1,:], class2_sample[2,:], '^', markersize=8, alpha=0.5, color='red', label='class2') | |
plt.title('Samples for class 1 and class 2') | |
ax.legend(loc='upper right') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment