Francisco Lima monogenea

Data scientist and blogger

monogenea / 1-poissonPCA.R

Created October 7, 2019 19:02

	# Generate scaled 4*5 matrix with random std normal samples
	set.seed(101)
	mat <- scale(matrix(rnorm(20), 4, 5))
	dimnames(mat) <- list(paste("Sample", 1:4), paste("Var", 1:5))

	# Perform PCA
	myPCA <- prcomp(mat, scale. = F, center = F)
	myPCA$rotation # loadings
	myPCA$x # scores

monogenea / 5-poissonPLS.R

Created October 7, 2019 19:01

	plot(varImp(mod1), 10, main = "PLS-DA")
	plot(varImp(mod2), 10, main = "PCA-DA")

monogenea / 4-poissonPLS.R

Created October 7, 2019 19:00

	# Compile models and compare performance
	models <- resamples(list("PLS-DA" = mod1, "PCA-DA" = mod2, "RF" = mod3))
	bwplot(models, metric = "Accuracy")

monogenea / 3-poissonPLS.R

Created October 7, 2019 18:59

monogenea / 2-poissonPLS.R

Created October 7, 2019 18:57

	# Compile cross-validation settings
	set.seed(100)
	myfolds <- createMultiFolds(arcene$class, k = 5, times = 10)
	control <- trainControl("repeatedcv", index = myfolds, selectionFunction = "oneSE")

	# Train PLS model
	mod1 <- train(class ~ ., data = arcene,
	method = "pls",
	metric = "Accuracy",
	tuneLength = 20,

monogenea / 1-poissonPLS.R

Created October 7, 2019 18:56

	# Load caret, install if necessary
	library(caret)
	arcene <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/arcene/ARCENE/arcene_train.data",
	sep = " ",
	colClasses = c(rep("numeric", 10000), "NULL"))
	# Add the labels as an additional column
	arcene$class <- factor(scan("https://archive.ics.uci.edu/ml/machine-learning-databases/arcene/ARCENE/arcene_train.labels",
	sep = "\t"))

monogenea / 9-poissonGWAS.R

Created October 7, 2019 18:53

	# QQ plot using GenABEL estlambda function
	png(paste(target, "_QQplot.png", sep = ""), width = 500, height = 500)
	lambda <- estlambda(GWASout$t.value**2, plot = T, method = "median")
	dev.off()

monogenea / 8-poissonGWAS.R

Created October 7, 2019 18:52

	# Manhattan plot
	GWASout <- read.table(paste(target, ".txt", sep = ""), header = T, colClasses = c("character", rep("numeric",4)))
	GWASout$type <- rep("typed", nrow(GWASout))
	GWASout$Neg_logP <- -log10(GWASout$p.value)
	GWASout <- merge(GWASout, genData$MAP[,c("SNP", "chr", "position")])
	GWASout <- GWASout[order(GWASout$Neg_logP, decreasing = T),]

	png(paste(target, ".png", sep = ""), height = 500,width = 1000)
	GWAS_Manhattan(GWASout)
	dev.off()

monogenea / 7-poissonGWAS.R

Created October 7, 2019 18:51

	# Choose trait for association analysis, use colnames(genData$LIP) for listing
	# NOTE: Ignore the first column of genData$LIP (gender)
	target <- "Cholesterol"

	phenodata <- data.frame("id" = rownames(genData$LIP),
	"phenotype" = scale(genData$LIP[,target]), stringsAsFactors = F)

	# Conduct GWAS (will take a while)
	start <- Sys.time()
	GWAA(genodata = genData$SNP, phenodata = phenodata, filename = paste(target, ".txt", sep = ""))

monogenea / 6-poissonGWAS.R

Created October 7, 2019 18:50

	# PCA
	set.seed(100)
	pca <- snpgdsPCA(genofile, sample.id = geno.sample.ids,
	snp.id = snpset.ibd, num.thread = 1)
	pctab <- data.frame(sample.id = pca$sample.id,
	PC1 = pca$eigenvect[,1],
	PC2 = pca$eigenvect[,2],
	stringsAsFactors = F)

	# Subset and/or reorder origin accordingly