Skip to content

Instantly share code, notes, and snippets.

View monogenea's full-sized avatar

Francisco Lima monogenea

View GitHub Profile
houses <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data",
header = F, na.string = "?")
colnames(houses) <- c("CRIM", "ZN", "INDUS","CHAS",
"NOX","RM","AGE","DIS","RAD",
"TAX","PTRATIO","B","LSTAT","MEDV")
# Perform PCA
pcaHouses <- prcomp(scale(houses[,-14]))
scoresHouses <- pcaHouses$x
str(winePCAmethods) # slots are marked with @
winePCAmethods@R2
winePCAmethods <- pca(wine[,-1], scale = "uv", center = T,
nPcs = 2, method = "svd")
slplot(winePCAmethods, scoresLoadings = c(T,T),
scol = wineClasses)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("pcaMethods")
library(pcaMethods)
wineOutlier <- wine
wineOutlier[10,] <- wineOutlier[10,]*10 # change the 10th obs. into an extreme one by multiplying its profile by 10
outlierPCA <- prcomp(scale(wineOutlier[,-1]))
plot(outlierPCA$x[,1:2], col = wineClasses)
dev.off() # clear the format from the previous plot
winePCA <- prcomp(scale(wine[,-1]))
plot(winePCA$x[,1:2], col = wineClasses)
# Name the variables
colnames(wine) <- c("Cvs","Alcohol","Malic acid","Ash",
"Alcalinity of ash", "Magnesium",
"Total phenols", "Flavanoids",
"Nonflavanoid phenols", "Proanthocyanins",
"Color intensity", "Hue",
"OD280/OD315 of diluted wines", "Proline")
# The first column corresponds to the classes
wineClasses <- factor(wine$Cvs)
wine <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
sep=",")
# Compare PCA scores with the SVD's U*Sigma
theoreticalScores <- mySVD$u %*% sigma
all(round(myPCA$x,5) == round(theoreticalScores,5)) # TRUE
# Compare PCA loadings with the SVD's V
all(round(myPCA$rotation,5) == round(mySVD$v,5)) # TRUE
# Show that mat == U*Sigma*t(V)
recoverMatSVD <- theoreticalScores %*% t(mySVD$v)
all(round(mat,5) == round(recoverMatSVD,5)) # TRUE
# Perform SVD
mySVD <- svd(mat)
mySVD # the diagonal of Sigma mySVD$d is given as a vector
sigma <- matrix(0,4,4) # we have 4 PCs, no need for a 5th column
diag(sigma) <- mySVD$d # sigma is now our true sigma matrix