This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP TABLE IF EXISTS union_nomismatch; | |
CREATE TABLE union_nomismatch | |
SELECT "12" as chrom, pos18, pos19, if(alleles_study IS NOT NULL, alleles_study, alleles_1000g) as alleles, maf1000g, maxmaf, meanmaf, novel, offtarget, thresh, highqual, superhighqual, | |
CASE | |
WHEN meanmaf IS NULL THEN maf1000g | |
WHEN maf1000g IS NULL THEN meanmaf | |
WHEN (meanmaf IS NOT NULL and maf1000g IS NOT NULL) THEN GREATEST(meanmaf, maf1000g) | |
END AS bestmaf | |
FROM ( | |
SELECT |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Load the BSgenome package and download the hg19 reference sequence | |
## http://www.bioconductor.org/packages/release/bioc/html/BSgenome.html | |
# source("http://www.bioconductor.org/biocLite.R") | |
# biocLite("BSgenome") | |
# biocLite("BSgenome.Hsapiens.UCSC.hg19") | |
library('BSgenome.Hsapiens.UCSC.hg19') | |
installed.genomes() | |
getflank <- function(position, alleles="[N/N]", chr="chr12", offset=10) { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#--------------- | |
# Sample overlap | |
#--------------- | |
# Samples where in_gwas=1. n=2503 | |
SELECT count(*) FROM igf1.sample_aapc_gwas where in_gwas=1; | |
# Samples where in_gwas=1 and that are actually in the GWAS data. n=2130. | |
SELECT count(*) FROM igf1.sample_aapc_gwas a JOIN mec.sample_genotyped b on a.m_id=b.m_id where in_gwas=1; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Stephen Turner | |
# http://StephenTurner.us/ | |
# http://GettingGeneticsDone.blogspot.com/ | |
# See license at http://gettinggeneticsdone.blogspot.com/p/copyright.html | |
# Last updated: Tuesday, April 19, 2011 | |
# R code for making manhattan plots and QQ plots from plink output files. | |
# manhattan() with GWAS data this can take a lot of memory, recommended for use on 64bit machines only, for now. | |
# Altnernatively, use bmanhattan() , i.e., base manhattan. uses base graphics. way faster. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------------------------- | |
# If you split the dataset (3195094 snps) into 320 files of 10,000 snps | |
# (the last file will have 5094 snps), then you can use this code to generate | |
# a "map" that will map the original column numbers (1:3195094) to a file number | |
# (1:1000) and a column number (1:10000). | |
#------------------------------------------------------------------------------------- | |
# nfiles <- 320 | |
# ncolsperfile <- 10000 | |
# ncols <- nfiles*ncolsperfile | |
# column <- 1:ncols |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#----------------------------------------------------------------------------------------------- | |
# | |
# Calculate PCs when the number of SNPs (m>4000) is large | |
# From G with SNPs on the columns, keep the first 20 PCs | |
# X = normed G, we want PCs (=Xv) where v is eigenvector of X'X (the covariance matrix) | |
# X'X can't be handled directly. | |
# INSTEAD, | |
# If XX'v= lv, X'XX'v = lX'v => if find eigenv for XX' then X'v is eigenv for X'X | |
# SIMILARLY, if X'Xv = lv, then XX'(Xv) = l(Xv) => the eigenv of XX' must be PCs that we want | |
# OF COURSE, the final result of the matrix with Xv must be orthonormal up to some constant |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
load("D:/2011-02-09 MEC GWAS DATA/2011-03-23 PCA/pcs-2-without-d.RData") | |
eigenvalues <- pcs[[2]] | |
pcs <- data.frame(pcs[[1]]) | |
pcs <- cbind(query("select * from mec_genotyped"),pcs) | |
ht(pcs) | |
dbWriteTable(con, "pcs", pcs, overwrite=T) | |
d<-read.csv("D:/2011-02-09 MEC GWAS DATA/PHENOTYPE DATA/2011-03-23 initial data from christian.csv",T) | |
dbWriteTable(con, "mec_pheno", d, overwrite=T) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Sample overlap | |
-- Samples where in_gwas=1. n=2503 | |
SELECT count(*) FROM igf1.sample_aapc_gwas where in_gwas=1; | |
-- Samples where in_gwas=1 and that are actually in the GWAS data. n=2130. | |
SELECT count(*) FROM igf1.sample_aapc_gwas a JOIN mec.sample_genotyped b on a.m_id=b.m_id where in_gwas=1; | |
-- What about the missing 373? | |
SELECT a.* FROM igf1.sample_aapc_gwas a LEFT JOIN mec.sample_genotyped b on a.m_id=b.m_id WHERE in_gwas=1 AND b.m_id IS NULL; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop table pheno4; | |
create table pheno4 (sampleindex int, m_id varchar(25), area varchar(1), q1_age_cohent int, q3prelim_waist int, q3prelim_hip int, age_calc float); | |
load data local infile 'C:/cygwin/home/sturner/mec_gwas/phenotypes/waisthip.csv' | |
into table pheno4 | |
fields terminated by ',' | |
ignore 1 lines | |
(sampleindex, m_id, area, q1_age_cohent, q3prelim_waist, q3prelim_hip, age_calc) | |
; | |
alter table pheno4 add column q3prelim_whr float after q3prelim_hip; | |
update pheno4 set q3prelim_whr=q3prelim_waist/q3prelim_hip; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d <- query(" #new | |
SELECT | |
if(b.snp='.' OR b.snp IS NULL, CONCAT_WS('_','chr12',a.pos19), b.snp) as Locus_Name, | |
'SNP' as Target_Type, | |
NULL as Sequence, | |
a.chrom AS Chromosome, | |
a.pos19 as Coordinate, | |
'19' as Genome_Build_Version, | |
'HSapiensReferenceSequence' as Source, | |
'19' as Source_version, |