Skip to content

Instantly share code, notes, and snippets.

View monogenea's full-sized avatar

Francisco Lima monogenea

View GitHub Profile
# PCA-DA
mod2 <- train(class ~ ., data = arcene,
method = "lda",
metric = "Accuracy",
trControl = control,
preProc = c("zv","center","scale","pca"))
# RF
mod3 <- train(class ~ ., data = arcene,
method = "ranger",
# Compile models and compare performance
models <- resamples(list("PLS-DA" = mod1, "PCA-DA" = mod2, "RF" = mod3))
bwplot(models, metric = "Accuracy")
plot(varImp(mod1), 10, main = "PLS-DA")
plot(varImp(mod2), 10, main = "PCA-DA")
# Generate scaled 4*5 matrix with random std normal samples
set.seed(101)
mat <- scale(matrix(rnorm(20), 4, 5))
dimnames(mat) <- list(paste("Sample", 1:4), paste("Var", 1:5))
# Perform PCA
myPCA <- prcomp(mat, scale. = F, center = F)
myPCA$rotation # loadings
myPCA$x # scores
# Perform SVD
mySVD <- svd(mat)
mySVD # the diagonal of Sigma mySVD$d is given as a vector
sigma <- matrix(0,4,4) # we have 4 PCs, no need for a 5th column
diag(sigma) <- mySVD$d # sigma is now our true sigma matrix
# Compare PCA scores with the SVD's U*Sigma
theoreticalScores <- mySVD$u %*% sigma
all(round(myPCA$x,5) == round(theoreticalScores,5)) # TRUE
# Compare PCA loadings with the SVD's V
all(round(myPCA$rotation,5) == round(mySVD$v,5)) # TRUE
# Show that mat == U*Sigma*t(V)
recoverMatSVD <- theoreticalScores %*% t(mySVD$v)
all(round(mat,5) == round(recoverMatSVD,5)) # TRUE
wine <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
sep=",")
# Name the variables
colnames(wine) <- c("Cvs","Alcohol","Malic acid","Ash",
"Alcalinity of ash", "Magnesium",
"Total phenols", "Flavanoids",
"Nonflavanoid phenols", "Proanthocyanins",
"Color intensity", "Hue",
"OD280/OD315 of diluted wines", "Proline")
# The first column corresponds to the classes
wineClasses <- factor(wine$Cvs)
dev.off() # clear the format from the previous plot
winePCA <- prcomp(scale(wine[,-1]))
plot(winePCA$x[,1:2], col = wineClasses)
wineOutlier <- wine
wineOutlier[10,] <- wineOutlier[10,]*10 # change the 10th obs. into an extreme one by multiplying its profile by 10
outlierPCA <- prcomp(scale(wineOutlier[,-1]))
plot(outlierPCA$x[,1:2], col = wineClasses)