Created
April 15, 2011 22:33
-
-
Save stephenturner/922588 to your computer and use it in GitHub Desktop.
2011-04-15 SNP Overlap.sql
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#--------------- | |
# Sample overlap | |
#--------------- | |
# Samples where in_gwas=1. n=2503 | |
SELECT count(*) FROM igf1.sample_aapc_gwas where in_gwas=1; | |
# Samples where in_gwas=1 and that are actually in the GWAS data. n=2130. | |
SELECT count(*) FROM igf1.sample_aapc_gwas a JOIN mec.sample_genotyped b on a.m_id=b.m_id where in_gwas=1; | |
# What about the missing 373? | |
SELECT a.* FROM igf1.sample_aapc_gwas a LEFT JOIN mec.sample_genotyped b on a.m_id=b.m_id WHERE in_gwas=1 AND b.m_id IS NULL; | |
#-------------------- | |
# Load SNP Stats Data | |
#-------------------- | |
drop table if exists snp_stats_all; | |
create table snp_stats_all (col int, snp varchar(25), nmiss int, ntot int, callrate float, maf float, allgenotyped int, propimputed float); | |
load data local infile | |
'C:/cygwin/home/sturner/MEC_GWAS/numbered_snp_stats_all.csv' | |
into table snp_stats_all | |
fields terminated by ',' | |
ignore 1 lines | |
(col, snp, nmiss, ntot, callrate, maf, allgenotyped, propimputed) | |
; | |
select * from snp_stats_all limit 100; | |
drop table if exists snp_stats_aapc; | |
create table snp_stats_aapc (col int, snp varchar(25), nmiss int, ntot int, callrate float, maf float, allgenotyped int, propimputed float); | |
load data local infile | |
'C:/cygwin/home/sturner/MEC_GWAS/numbered_snp_stats_aapc.csv' | |
into table snp_stats_aapc | |
fields terminated by ',' | |
ignore 1 lines | |
(col, snp, nmiss, ntot, callrate, maf, allgenotyped, propimputed) | |
; | |
select * from snp_stats_aapc limit 100; | |
#------------ | |
# SNP Overlap | |
#------------ | |
CREATE TABLE igf1.gwas_overlap | |
SELECT | |
a.chrom, a.pos19, b.snp, | |
c.callrate as callrate_gwas_aapc, | |
c.maf as maf_gwas_aapc, | |
c.allgenotyped as allgenotyped_gwas_aapc, | |
c.propimputed as propimputed_gwas_aapc, | |
d.callrate as callrate_gwas_all, | |
d.maf as maf_gwas_all , | |
d.allgenotyped as allgenotyped_gwas_all , | |
d.propimputed as propimputed_gwas_all | |
FROM igf1.union_nomismatch a | |
JOIN igf1.1000g_sites b on a.pos19=b.pos # join to 1000g table to get only SNPs with RS numbers | |
JOIN mec.snp_stats_aapc c on b.snp=c.snp # join to the AAPC snp stats table | |
JOIN snp_stats_all d on c.snp=d.snp; # join to the ALL snp stats table | |
SELECT | |
IF(b.snp='.' OR b.snp IS NULL, CONCAT_WS('-','chr12',a.pos19), CONCAT_WS('-','snp',b.snp)) as SNP_Name, | |
'SNP' as target_type, a.chrom, a.pos19, CONCAT('[',alleles,']') as alleles, priority, | |
bestmaf as priority_tiebreaker, meanmaf, maf1000g, | |
callrate_gwas_aapc, maf_gwas_aapc, allgenotyped_gwas_aapc, propimputed_gwas_aapc, | |
'hg19/b37' AS Genome_Build_Version, | |
'HSapiensReferenceSequence' as Source, | |
'hg19' as Source_version, | |
'Forward' as Sequence_Orientation, | |
'Plus' as Plus_Minus | |
FROM igf1.union_nomismatch_priority a | |
LEFT JOIN igf1.1000g_sites b on a.pos19=b.pos | |
LEFT JOIN igf1.gwas_overlap c on b.snp=c.snp | |
WHERE allgenotyped_gwas_aapc=1 and callrate_gwas_aapc>.98; | |
# Overlap of SNPs in GWAS | |
SELECT | |
chrom, pos19, snp, | |
callrate_gwas_aapc, | |
maf_gwas_aapc, | |
allgenotyped_gwas_aapc, | |
propimputed_gwas_aapc | |
FROM gwas_overlap | |
WHERE callrate_gwas_aapc > 0; | |
# Overlap of genotyped SNPs | |
SELECT | |
chrom, pos19, snp, | |
callrate_gwas_aapc, | |
maf_gwas_aapc, | |
allgenotyped_gwas_aapc, | |
propimputed_gwas_aapc | |
FROM gwas_overlap | |
WHERE callrate_gwas_aapc > 0 and allgenotyped_gwas_aapc=1; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment