Francisco Lima monogenea

Data scientist and blogger

monogenea / 5-poissonGWAS.R

Created October 7, 2019 18:49

	# LD and kinship coeff
	ld <- .2
	kin <- .1
	snpgdsBED2GDS(bed.fn = "convertGDS.bed", bim.fn = "convertGDS.bim",
	fam.fn = "convertGDS.fam", out.gdsfn = "myGDS",
	cvt.chr = "char")
	genofile <- snpgdsOpen("myGDS", readonly = F)
	gds.ids <- read.gdsn(index.gdsn(genofile, "sample.id"))
	gds.ids <- sub("-1", "", gds.ids)
	add.gdsn(genofile, "sample.id", gds.ids, replace = T)

monogenea / 4-poissonGWAS.R

Created October 7, 2019 18:48

	# Sample call rate & heterozygosity
	callMat <- !is.na(genData$SNP)
	Sampstats <- row.summary(genData$SNP)
	hetExp <- callMat %% (2 SNPstats$MAF * (1 - SNPstats$MAF)) # Hardy-Weinberg heterozygosity (expected)
	hetObs <- with(Sampstats, Heterozygosity * (ncol(genData$SNP)) * Call.rate)
	Sampstats$hetF <- 1-(hetObs/hetExp)
	# Use sample call rate of 100%, het threshold of 0.1 (very stringent)
	het <- 0.1 # Set cutoff for inbreeding coefficient;
	het_call <- with(Sampstats, abs(hetF) < het & Call.rate == 1)
	genData$SNP <- genData$SNP[het_call,]

monogenea / 3-poissonGWAS.R

Created October 7, 2019 18:48

	library(snpStats)
	library(doParallel)
	library(SNPRelate)
	library(GenABEL)
	library(dplyr)
	source("GWASfunction.R")
	load("PhenoGenoMap.RData")

	# Use SNP call rate of 100%, MAF of 0.1 (very stringent)
	maf <- 0.1

monogenea / 2-poissonGWAS.R

Last active December 2, 2019 14:22

	# Load lipid datasets & match SNP-Lipidomics samples
	lipidsMalay <- read.delim("public/Lipidomic/117Malay_282lipids.txt", row.names = 1)
	lipidsIndian <- read.delim("public/Lipidomic/120Indian_282lipids.txt", row.names = 1)
	lipidsChinese <- read.delim("public/Lipidomic/122Chinese_282lipids.txt", row.names = 1)

	all(Reduce(intersect, list(colnames(lipidsMalay),
	colnames(lipidsIndian),
	colnames(lipidsChinese))) == colnames(lipidsMalay)) # TRUE
	lip <- rbind(lipidsMalay, lipidsIndian, lipidsChinese)

monogenea / 1-poissonGWAS.R

Last active December 2, 2019 14:21

	library(snpStats)
	load("conversionTable.RData")

	pathM <- paste("public/Genomics/108Malay_2527458snps", c(".bed", ".bim", ".fam"), sep = "")
	SNP_M <- read.plink(pathM[1], pathM[2], pathM[3])

	pathI <- paste("public/Genomics/105Indian_2527458snps", c(".bed", ".bim", ".fam"), sep = "")
	SNP_I <- read.plink(pathI[1], pathI[2], pathI[3])

	pathC <- paste("public/Genomics/110Chinese_2527458snps", c(".bed", ".bim", ".fam"), sep = "")

monogenea / 15-poissonLMM.R

Created October 7, 2019 18:39

plot(finalModel)

monogenea / 14-poissonLMM.R

Last active October 7, 2019 18:38

	dev.off() # Reset previous graphical pars
	# New GLM, updated from the first by estimating with REML
	GLM2 <- update(GLM, .~., method = "REML")
	# Plot side by side, beta with respective SEs
	plot(coef(GLM2), xlab = "Fixed Effects", ylab = expression(beta), axes = F,
	pch = 16, col = "black", ylim = c(-.9,2.2))
	stdErrors <- coef(summary(GLM2))[,2]
	segments(x0 = 1:6, x1 = 1:6, y0 = coef(GLM2) - stdErrors, y1 = coef(GLM2) + stdErrors,
	col = "black")
	axis(2)

monogenea / 13-poissonLMM.R

Created October 7, 2019 18:37

	finalModel <- update(lmm6.2, .~., method = "REML")
	summary(finalModel)

monogenea / 12-poissonLMM.R

Created October 7, 2019 18:37

monogenea / 11-poissonLMM.R

Created October 7, 2019 18:36

summary(lmm6.2)