explodecomputer · January 12, 2022 21:29
diff --git a/examples.r b/examples.r
 # MR-Base (i.e. using the cloud)

 library(TwoSampleMR)
 metab_ids <- c("met-e-1", "met-e-2")

 # Extract data - from API
 exp_dat <- extract_instruments("alz_id")
 out_dat <- extract_outcome_data(exp_dat$SNP, metab_ids)

 # Harmonise and run
 dat <- harmonise_data(exp_dat, out_dat)
 mr(dat)


 # Running locally / on HPC (i.e. using GWAS VCF files on bc4)

 library(gwasglue)
 library(gwasvcf)
 library(TwoSampleMR)

 # Set default locations for bcftools, plink and ld reference files etc for use in bc4
 set_bcftools()
 set_plink()
 set_bc4_files()

 # extract data from vcfs using this helper function
 # see documentation here https://mrcieu.github.io/gwasglue/reference/make_TwoSampleMR_dat.html
 dat <- make_TwoSampleMR_dat(id1="alz_id", id2=metab_ids)

 # run as usual
 mr(dat)



 # Running locally / on HPC (but using unprocessed txt.gzip files)

 # Need to extract the SNPs you're interested in either in R using this function
 # https://mrcieu.github.io/TwoSampleMR/reference/read_outcome_data.html
 # Also provide arguments so it knows which columns to read in beta, se etc
 exp_dat <- extract_instruments("alz_id")
 out_dat1 <- read_outcome_data(filename="path/to/file1.txt.gz", snps=exp_dat$SNP)

 # You can loop across all files by putting this in a for loop or whatever. It'll be quite slow though.

 # Alternatively in bash do this sort of thing:

 ## make a snplist.txt that lists all the SNPs you want from the outcome metab datasets

 # Create dir for outputs
 mkdir -p /path/to/extract

 # Create filelist.txt that lists all the filenames

 # This is a loop that will go dataset by dataset extracting the SNPs you want
 while IFS= read -r id
 do
 	# For every line in filelist, set that line to be a variable called ${id}
 	zgrep -wf snplist.txt /path/to/gzipfiles/${id} | gzip -c > /path/to/extract/${id}
 done < filelist.txt


 # Back in R read all the data in and format to TwoSampleMR format using format_data
	# MR-Base (i.e. using the cloud)

	library(TwoSampleMR)
	metab_ids <- c("met-e-1", "met-e-2")

	# Extract data - from API
	exp_dat <- extract_instruments("alz_id")
	out_dat <- extract_outcome_data(exp_dat$SNP, metab_ids)

	# Harmonise and run
	dat <- harmonise_data(exp_dat, out_dat)
	mr(dat)


	# Running locally / on HPC (i.e. using GWAS VCF files on bc4)

	library(gwasglue)
	library(gwasvcf)
	library(TwoSampleMR)

	# Set default locations for bcftools, plink and ld reference files etc for use in bc4
	set_bcftools()
	set_plink()
	set_bc4_files()

	# extract data from vcfs using this helper function
	# see documentation here https://mrcieu.github.io/gwasglue/reference/make_TwoSampleMR_dat.html
	dat <- make_TwoSampleMR_dat(id1="alz_id", id2=metab_ids)

	# run as usual
	mr(dat)



	# Running locally / on HPC (but using unprocessed txt.gzip files)

	# Need to extract the SNPs you're interested in either in R using this function
	# https://mrcieu.github.io/TwoSampleMR/reference/read_outcome_data.html
	# Also provide arguments so it knows which columns to read in beta, se etc
	exp_dat <- extract_instruments("alz_id")
	out_dat1 <- read_outcome_data(filename="path/to/file1.txt.gz", snps=exp_dat$SNP)

	# You can loop across all files by putting this in a for loop or whatever. It'll be quite slow though.

	# Alternatively in bash do this sort of thing:

	## make a snplist.txt that lists all the SNPs you want from the outcome metab datasets

	# Create dir for outputs
	mkdir -p /path/to/extract

	# Create filelist.txt that lists all the filenames

	# This is a loop that will go dataset by dataset extracting the SNPs you want
	while IFS= read -r id
	do
	# For every line in filelist, set that line to be a variable called ${id}
	zgrep -wf snplist.txt /path/to/gzipfiles/${id} \| gzip -c > /path/to/extract/${id}
	done < filelist.txt


	# Back in R read all the data in and format to TwoSampleMR format using format_data