Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created April 15, 2011 22:33
Show Gist options
  • Save stephenturner/922588 to your computer and use it in GitHub Desktop.
Save stephenturner/922588 to your computer and use it in GitHub Desktop.
2011-04-15 SNP Overlap.sql
#---------------
# Sample overlap
#---------------
# Samples where in_gwas=1. n=2503
SELECT count(*) FROM igf1.sample_aapc_gwas where in_gwas=1;
# Samples where in_gwas=1 and that are actually in the GWAS data. n=2130.
SELECT count(*) FROM igf1.sample_aapc_gwas a JOIN mec.sample_genotyped b on a.m_id=b.m_id where in_gwas=1;
# What about the missing 373?
SELECT a.* FROM igf1.sample_aapc_gwas a LEFT JOIN mec.sample_genotyped b on a.m_id=b.m_id WHERE in_gwas=1 AND b.m_id IS NULL;
#--------------------
# Load SNP Stats Data
#--------------------
drop table if exists snp_stats_all;
create table snp_stats_all (col int, snp varchar(25), nmiss int, ntot int, callrate float, maf float, allgenotyped int, propimputed float);
load data local infile
'C:/cygwin/home/sturner/MEC_GWAS/numbered_snp_stats_all.csv'
into table snp_stats_all
fields terminated by ','
ignore 1 lines
(col, snp, nmiss, ntot, callrate, maf, allgenotyped, propimputed)
;
select * from snp_stats_all limit 100;
drop table if exists snp_stats_aapc;
create table snp_stats_aapc (col int, snp varchar(25), nmiss int, ntot int, callrate float, maf float, allgenotyped int, propimputed float);
load data local infile
'C:/cygwin/home/sturner/MEC_GWAS/numbered_snp_stats_aapc.csv'
into table snp_stats_aapc
fields terminated by ','
ignore 1 lines
(col, snp, nmiss, ntot, callrate, maf, allgenotyped, propimputed)
;
select * from snp_stats_aapc limit 100;
#------------
# SNP Overlap
#------------
CREATE TABLE igf1.gwas_overlap
SELECT
a.chrom, a.pos19, b.snp,
c.callrate as callrate_gwas_aapc,
c.maf as maf_gwas_aapc,
c.allgenotyped as allgenotyped_gwas_aapc,
c.propimputed as propimputed_gwas_aapc,
d.callrate as callrate_gwas_all,
d.maf as maf_gwas_all ,
d.allgenotyped as allgenotyped_gwas_all ,
d.propimputed as propimputed_gwas_all
FROM igf1.union_nomismatch a
JOIN igf1.1000g_sites b on a.pos19=b.pos # join to 1000g table to get only SNPs with RS numbers
JOIN mec.snp_stats_aapc c on b.snp=c.snp # join to the AAPC snp stats table
JOIN snp_stats_all d on c.snp=d.snp; # join to the ALL snp stats table
SELECT
IF(b.snp='.' OR b.snp IS NULL, CONCAT_WS('-','chr12',a.pos19), CONCAT_WS('-','snp',b.snp)) as SNP_Name,
'SNP' as target_type, a.chrom, a.pos19, CONCAT('[',alleles,']') as alleles, priority,
bestmaf as priority_tiebreaker, meanmaf, maf1000g,
callrate_gwas_aapc, maf_gwas_aapc, allgenotyped_gwas_aapc, propimputed_gwas_aapc,
'hg19/b37' AS Genome_Build_Version,
'HSapiensReferenceSequence' as Source,
'hg19' as Source_version,
'Forward' as Sequence_Orientation,
'Plus' as Plus_Minus
FROM igf1.union_nomismatch_priority a
LEFT JOIN igf1.1000g_sites b on a.pos19=b.pos
LEFT JOIN igf1.gwas_overlap c on b.snp=c.snp
WHERE allgenotyped_gwas_aapc=1 and callrate_gwas_aapc>.98;
# Overlap of SNPs in GWAS
SELECT
chrom, pos19, snp,
callrate_gwas_aapc,
maf_gwas_aapc,
allgenotyped_gwas_aapc,
propimputed_gwas_aapc
FROM gwas_overlap
WHERE callrate_gwas_aapc > 0;
# Overlap of genotyped SNPs
SELECT
chrom, pos19, snp,
callrate_gwas_aapc,
maf_gwas_aapc,
allgenotyped_gwas_aapc,
propimputed_gwas_aapc
FROM gwas_overlap
WHERE callrate_gwas_aapc > 0 and allgenotyped_gwas_aapc=1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment