Last active
March 11, 2020 15:36
-
-
Save BioSciEconomist/1fdcda1fbabca9100a5f0ed8a08826a1 to your computer and use it in GitHub Desktop.
Basic factor analysis examples
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: ex factor analysis.R | |
# | DATE: 3/6/20 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: basic mechanics of factor analysis | |
# *---------------------------------------------------------------- | |
# rm(list=ls()) # get rid of any existing data | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
library(psych) | |
#--------------------------------- | |
# example 1 basic cluster analysis | |
#--------------------------------- | |
#Loading the dataset | |
bfi_data = bfi | |
# Remove rows with missing values and keep only complete cases | |
bfi_data=bfi_data[complete.cases(bfi_data),] | |
# write.csv(bfi_data,"bfi_data.csv") | |
# Create the correlation matrix from bfi_data | |
bfi_cor <- cor(bfi_data) | |
#Factor analysis of the data | |
factors_data <- fa(r = bfi_cor, nfactors = 6, scores = TRUE) | |
print(factors_data) | |
#--------------------------------- | |
# example 2 adding factor scores to data set | |
#--------------------------------- | |
df <- bfi[,1:24] #select variables to include in fa | |
fit <- fa(df, 2) #estimate model with 2 factors | |
fs <- factor.scores(df, fit) #obtain factor scores | |
fs <- fs$scores #get the columns of factor scores for each case | |
df <- cbind(df,fs) #append factor scores to dataset | |
#--------------------------------- | |
# example 3 adding factor scores to data set | |
#--------------------------------- | |
df <- bfi[,1:24] #select variables to include in fa | |
df <- df[complete.cases(df),] #get complete cases | |
df_cor <- cor(df) # compute correlation matrix | |
fit <- fa(r = df_cor, nfactors = 6, scores = TRUE) # factor analysis | |
fs <- factor.scores(df, fit) #obtain factor scores | |
fs <- fs$scores #get the columns of factor scores for each case | |
df <- cbind(df,fs) #append factor scores to dataset | |
#---------------------------------- | |
# example 4 rotation and plots | |
#-------------------------------- | |
# Maximum Likelihood Factor Analysis | |
# with varimax rotation | |
df <- bfi[,1:24] #select variables to include in fa | |
df <- df[complete.cases(df),] #get complete cases | |
fit <- factanal(df, 3, rotation="varimax") # perform fa | |
print(fit, digits=2, cutoff=.3, sort=TRUE) # print 'easy to read' loadings | |
# plot factor 1 by factor 2 | |
load <- fit$loadings[,1:2] | |
plot(load,type="n") # set up plot | |
text(load,labels=names(df),cex=.7) # add variable names | |
#-------------------------------- | |
# example 5 determining # of factors | |
#------------------------------ | |
library(nFactors) | |
df <- bfi[,1:24] #select variables to include in fa | |
df <- df[complete.cases(df),] #get complete cases | |
ev <- eigen(cor(df)) # get eigenvalues | |
ap <- parallel(subject=nrow(df),var=ncol(df), | |
rep=100,cent=.05) | |
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) | |
plotnScree(nS) | |
#--------------------------------- | |
# example 6 end to end analysis with clustering | |
#-------------------------------- | |
# rm(list=ls()) # get rid of any existing data | |
# step 1 determine number of factors in solution | |
library(nFactors) | |
df <- bfi[,1:24] #select variables to include in fa | |
df <- df[complete.cases(df),] #get complete cases | |
ev <- eigen(cor(df)) # get eigenvalues | |
ap <- parallel(subject=nrow(df),var=ncol(df), | |
rep=100,cent=.05) | |
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) | |
plotnScree(nS) | |
# from this result ~ 6 factors | |
# step 2 apply fa and add to dataset | |
df <- bfi[,1:24] #select variables to include in fa | |
df <- df[complete.cases(df),] #get complete cases | |
df_cor <- cor(df) # compute correlation matrix | |
fit <- fa(r = df_cor, nfactors = 6, scores = TRUE) # factor analysis | |
print(fit, digits=2, cutoff=.3, sort=TRUE) # print 'easy to read' loadings | |
fs <- factor.scores(df, fit) #obtain factor scores | |
fs <- fs$scores #get the columns of factor scores for each case | |
df <- cbind(df,fs) #append factor scores to dataset | |
# step 3 apply cluster analysis to the factors | |
tmp <- df[c("MR1","MR2","MR3","MR4","MR5","MR6")] | |
tmp <- scale(tmp) # standardize variables | |
# Determine number of clusters | |
wss <- (nrow(df)-1)*sum(apply(tmp,2,var)) | |
for (i in 2:15) wss[i] <- sum(kmeans(tmp, | |
centers=i,iter.max = 50)$withinss) | |
plot(1:15, wss, type="b", xlab="Number of Clusters", | |
ylab="Within groups sum of squares") | |
# based on plot determine # of clusters (~ 6) | |
# K-Means Cluster Analysis | |
fit <- kmeans(tmp, 5) # 5 cluster solution | |
# get cluster means | |
aggregate(tmp,by=list(fit$cluster),FUN=mean) | |
# append cluster assignment | |
df <- data.frame(df, fit$cluster) | |
#----------------------------------- | |
# calculate principal component loadings by hand | |
#---------------------------------- | |
library(psych) | |
#Loading the dataset | |
bfi_data = bfi | |
# Remove rows with missing values and keep only complete cases | |
bfi_data=bfi_data[complete.cases(bfi_data),] | |
# write.csv(bfi_data,"bfi_data.csv") | |
# Create the correlation matrix from bfi_data | |
bfi_cor <- cor(bfi_data) | |
# perform PCA | |
pc_bfi <- principal(bfi_cor, nfactors = 28, rotate = "none") | |
pc_bfi$loadings | |
# calculate eigenvectors and values 'by hand' | |
ev <- eigen(bfi_cor) # calculate eigenvalues and vectors | |
values <- ev$values # get eigenvalues | |
vectors <- ev$vectors # get eigenvectors | |
# loadings = vector values * sqrt(eigenvalues) | |
#ex: calculate loadings for 1st PC | |
(vectors[,1]*sqrt(values[1])) # 1st loading | |
pc_bfi$loadings # compare to results above from pca routine | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment