Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
explodecomputer / behaviour.r
Created March 10, 2017 22:37
genetic models of parent - offspring behaviour
n <- 10000
gm <- rbinom(n, 2, 0.5)
gp <- rbinom(n, 2, 0.5)
# approximate child genotypes
gc <- round((gm + gp) / 2)
cor(gm, gc)^2
cor(gp, gc)^2
@explodecomputer
explodecomputer / snp_to_kegg_go.r
Created March 13, 2017 11:30
SNP IDs to KEGG and GO terms
# source("https://bioconductor.org/biocLite.R")
# biocLite("KEGGREST")
# biocLite("org.Hs.eg.db")
library(biomaRt)
library(KEGGREST)
library(org.Hs.eg.db)
# example snp list
snp <- c("rs1007648", "rs112275031", "rs1124639", "rs11731570", "rs11743154", "rs12439909")
@explodecomputer
explodecomputer / ld.r
Created March 16, 2017 09:45
ld and imputation demo
n <- 1000
nsnp <- 10
r <- matrix(0.8, nsnp, nsnp)
diag(r) <- 1
x1 <- rmvnorm(n, rep(0, nsnp), sigma=r)
x2 <- rmvnorm(n, rep(0, nsnp), sigma=r)
x1[1:500, nsnp] <- x2[1:500, 1]
@explodecomputer
explodecomputer / example_plink.sh
Last active March 16, 2017 12:39
example plink
@explodecomputer
explodecomputer / make_residuals.R
Created March 27, 2017 12:13
make phenotype residuals from covariates
library(data.table)
arguments <- commandArgs(T)
phenfile <- arguments[1]
covfile <- arguments[2]
outfile <- arguments[3]
lmodel <- arguments[4]
# expect lmodel to be "logistic" or "linear"
@explodecomputer
explodecomputer / survival_bias.r
Created April 6, 2017 10:21
alternative survival bias
library(dplyr)
# X causes DEATH
# P causes DEATH
# AGE causes DEATH
# G causes X
# AGE causes Y
# - IF cases and controls are AGE matched then NO SURVIVAL BIAS
n <- 1000000
@explodecomputer
explodecomputer / ios_aom.R
Created April 11, 2017 12:00
Age of menarche index of suspicion
library(TwoSampleMR)
library(MRInstruments)
library(ggplot2)
library(dplyr)
library(reshape2)
library(classInt)
calculate_ios <- function(exposure_dat, outcome_dat)
{
@explodecomputer
explodecomputer / mv_mr_example.r
Created April 28, 2017 10:55
multivariable mr example
# Lipids (GLGC) against CHD
id_exposure <- c(299, 300, 302)
id_outcome <- 7
# Get exposures
exposure_dat <- mv_extract_exposures(id_exposure)
# Chris - if you have external data then is it possible to create the same format as this?
# - it's in long format
# - each SNP appears as many times as there are exposures
@explodecomputer
explodecomputer / assortativemating.
Last active May 4, 2017 15:28
assortative mating
# Sample size
nid <- 2000
# Number of SNPs
nsnp <- 1000
# Variance explained by SNPs
varexp <- 0.8
# Effect sizes of SNPs
# Get the GLGC data
system("wget http://csg.sph.umich.edu/abecasis/public/lipids2013/jointGwasMc_HDL.txt.gz")
# Get 1000 genomes frequencies
system("plink1.90 --bfile /panfs/panasas01/shared/alspac/studies/latest/alspac/genetic/variants/arrays/gwas/imputed/1000genomes/released/2015-10-30/data/derived/filtered/bestguess/maf0.01_info0.8/data_chr01 --freq --out chr1")
# Predict effect sizes and standard errors