This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load lipid datasets & match SNP-Lipidomics samples | |
lipidsMalay <- read.delim("public/Lipidomic/117Malay_282lipids.txt", row.names = 1) | |
lipidsIndian <- read.delim("public/Lipidomic/120Indian_282lipids.txt", row.names = 1) | |
lipidsChinese <- read.delim("public/Lipidomic/122Chinese_282lipids.txt", row.names = 1) | |
all(Reduce(intersect, list(colnames(lipidsMalay), | |
colnames(lipidsIndian), | |
colnames(lipidsChinese))) == colnames(lipidsMalay)) # TRUE | |
lip <- rbind(lipidsMalay, lipidsIndian, lipidsChinese) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(snpStats) | |
library(doParallel) | |
library(SNPRelate) | |
library(GenABEL) | |
library(dplyr) | |
source("GWASfunction.R") | |
load("PhenoGenoMap.RData") | |
# Use SNP call rate of 100%, MAF of 0.1 (very stringent) | |
maf <- 0.1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sample call rate & heterozygosity | |
callMat <- !is.na(genData$SNP) | |
Sampstats <- row.summary(genData$SNP) | |
hetExp <- callMat %*% (2 * SNPstats$MAF * (1 - SNPstats$MAF)) # Hardy-Weinberg heterozygosity (expected) | |
hetObs <- with(Sampstats, Heterozygosity * (ncol(genData$SNP)) * Call.rate) | |
Sampstats$hetF <- 1-(hetObs/hetExp) | |
# Use sample call rate of 100%, het threshold of 0.1 (very stringent) | |
het <- 0.1 # Set cutoff for inbreeding coefficient; | |
het_call <- with(Sampstats, abs(hetF) < het & Call.rate == 1) | |
genData$SNP <- genData$SNP[het_call,] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# LD and kinship coeff | |
ld <- .2 | |
kin <- .1 | |
snpgdsBED2GDS(bed.fn = "convertGDS.bed", bim.fn = "convertGDS.bim", | |
fam.fn = "convertGDS.fam", out.gdsfn = "myGDS", | |
cvt.chr = "char") | |
genofile <- snpgdsOpen("myGDS", readonly = F) | |
gds.ids <- read.gdsn(index.gdsn(genofile, "sample.id")) | |
gds.ids <- sub("-1", "", gds.ids) | |
add.gdsn(genofile, "sample.id", gds.ids, replace = T) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PCA | |
set.seed(100) | |
pca <- snpgdsPCA(genofile, sample.id = geno.sample.ids, | |
snp.id = snpset.ibd, num.thread = 1) | |
pctab <- data.frame(sample.id = pca$sample.id, | |
PC1 = pca$eigenvect[,1], | |
PC2 = pca$eigenvect[,2], | |
stringsAsFactors = F) | |
# Subset and/or reorder origin accordingly |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Choose trait for association analysis, use colnames(genData$LIP) for listing | |
# NOTE: Ignore the first column of genData$LIP (gender) | |
target <- "Cholesterol" | |
phenodata <- data.frame("id" = rownames(genData$LIP), | |
"phenotype" = scale(genData$LIP[,target]), stringsAsFactors = F) | |
# Conduct GWAS (will take a while) | |
start <- Sys.time() | |
GWAA(genodata = genData$SNP, phenodata = phenodata, filename = paste(target, ".txt", sep = "")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Manhattan plot | |
GWASout <- read.table(paste(target, ".txt", sep = ""), header = T, colClasses = c("character", rep("numeric",4))) | |
GWASout$type <- rep("typed", nrow(GWASout)) | |
GWASout$Neg_logP <- -log10(GWASout$p.value) | |
GWASout <- merge(GWASout, genData$MAP[,c("SNP", "chr", "position")]) | |
GWASout <- GWASout[order(GWASout$Neg_logP, decreasing = T),] | |
png(paste(target, ".png", sep = ""), height = 500,width = 1000) | |
GWAS_Manhattan(GWASout) | |
dev.off() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# QQ plot using GenABEL estlambda function | |
png(paste(target, "_QQplot.png", sep = ""), width = 500, height = 500) | |
lambda <- estlambda(GWASout$t.value**2, plot = T, method = "median") | |
dev.off() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load caret, install if necessary | |
library(caret) | |
arcene <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/arcene/ARCENE/arcene_train.data", | |
sep = " ", | |
colClasses = c(rep("numeric", 10000), "NULL")) | |
# Add the labels as an additional column | |
arcene$class <- factor(scan("https://archive.ics.uci.edu/ml/machine-learning-databases/arcene/ARCENE/arcene_train.labels", | |
sep = "\t")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compile cross-validation settings | |
set.seed(100) | |
myfolds <- createMultiFolds(arcene$class, k = 5, times = 10) | |
control <- trainControl("repeatedcv", index = myfolds, selectionFunction = "oneSE") | |
# Train PLS model | |
mod1 <- train(class ~ ., data = arcene, | |
method = "pls", | |
metric = "Accuracy", | |
tuneLength = 20, |