Created
January 12, 2022 21:29
-
-
Save explodecomputer/76cabfe24073ebe44320467bdd308427 to your computer and use it in GitHub Desktop.
Reading in data to TwoSampleMR options
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MR-Base (i.e. using the cloud) | |
library(TwoSampleMR) | |
metab_ids <- c("met-e-1", "met-e-2") | |
# Extract data - from API | |
exp_dat <- extract_instruments("alz_id") | |
out_dat <- extract_outcome_data(exp_dat$SNP, metab_ids) | |
# Harmonise and run | |
dat <- harmonise_data(exp_dat, out_dat) | |
mr(dat) | |
# Running locally / on HPC (i.e. using GWAS VCF files on bc4) | |
library(gwasglue) | |
library(gwasvcf) | |
library(TwoSampleMR) | |
# Set default locations for bcftools, plink and ld reference files etc for use in bc4 | |
set_bcftools() | |
set_plink() | |
set_bc4_files() | |
# extract data from vcfs using this helper function | |
# see documentation here https://mrcieu.github.io/gwasglue/reference/make_TwoSampleMR_dat.html | |
dat <- make_TwoSampleMR_dat(id1="alz_id", id2=metab_ids) | |
# run as usual | |
mr(dat) | |
# Running locally / on HPC (but using unprocessed txt.gzip files) | |
# Need to extract the SNPs you're interested in either in R using this function | |
# https://mrcieu.github.io/TwoSampleMR/reference/read_outcome_data.html | |
# Also provide arguments so it knows which columns to read in beta, se etc | |
exp_dat <- extract_instruments("alz_id") | |
out_dat1 <- read_outcome_data(filename="path/to/file1.txt.gz", snps=exp_dat$SNP) | |
# You can loop across all files by putting this in a for loop or whatever. It'll be quite slow though. | |
# Alternatively in bash do this sort of thing: | |
## make a snplist.txt that lists all the SNPs you want from the outcome metab datasets | |
# Create dir for outputs | |
mkdir -p /path/to/extract | |
# Create filelist.txt that lists all the filenames | |
# This is a loop that will go dataset by dataset extracting the SNPs you want | |
while IFS= read -r id | |
do | |
# For every line in filelist, set that line to be a variable called ${id} | |
zgrep -wf snplist.txt /path/to/gzipfiles/${id} | gzip -c > /path/to/extract/${id} | |
done < filelist.txt | |
# Back in R read all the data in and format to TwoSampleMR format using format_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment