Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created June 21, 2011 18:55
Show Gist options
  • Save stephenturner/1038587 to your computer and use it in GitHub Desktop.
Save stephenturner/1038587 to your computer and use it in GitHub Desktop.
iona-pull-711-snps-priority-3.r
d <- query(" #new
SELECT
if(b.snp='.' OR b.snp IS NULL, CONCAT_WS('_','chr12',a.pos19), b.snp) as Locus_Name,
'SNP' as Target_Type,
NULL as Sequence,
a.chrom AS Chromosome,
a.pos19 as Coordinate,
'19' as Genome_Build_Version,
'HSapiensReferenceSequence' as Source,
'19' as Source_version,
'Forward' as Sequence_Orientation,
'Plus' as Plus_minus,
a.pos19, CONCAT('[',alleles,']') as alleles, priority,
bestmaf as priority_tiebreaker
FROM igf1.union_nomismatch_priority a
LEFT JOIN igf1.1000g_sites b on a.pos19=b.pos
LEFT JOIN igf1.gwas_overlap c on b.snp=c.snp
WHERE ((offtarget=0 and maxmaf>.05) OR (maf1000g>0.05))
;")
dim(d)
dev.off()
hist(d$pos19, 100, col="black")
bins <- 100
par(mfrow=c(2,1));hist(dold$pos19,bins, col=1);hist(d$pos19,bins, col=1)
i <- 1
t1=Sys.time()
for (i in 1:nrow(d)) d$Sequence[i] <- getflank(d$pos19[i], alleles=d$alleles[i], offset=60) #this takes a while!
t2=Sys.time()
t2-t1
sum(is.na(d$Sequence))
#write.table(d,"igf1-snps-with-flanking-seq.csv",quote=F,row=F,sep=',')
rs_not_found <- query("select * from igf1.rs_not_found;",strings=F)$snp
rs_not_found <- rs_not_found[rs_not_found %in% d$Locus_Name]
d_seq <- d[grep("^rs",d$Locus_Name, invert=T),1:10]
d_id <- d[grep("^rs",d$Locus_Name, invert=F), 1:10]
lapply(list(d_seq,d_id,d),nrow); sum(rs_not_found %in% d$Locus_Name)
d_seq <- rbind(d_seq, d_id[d_id$Locus_Name %in% rs_not_found, ])
d_seq$Locus_Name <- sub("^rs","snp-rs",d_seq$Locus_Name)
d_seq <- d_seq[order(d_seq$Coordinate), ]
d_id <- d_id[d_id$Locus_Name %nin% rs_not_found, ][1]
lapply(list(d_seq,d_id,d),nrow); sum(rs_not_found %in% d$Locus_Name)
write.table(d_seq, name("sequence priority 1-3, 151 snps.csv"), row=F, quote=F, sep=',')
write.table(d_id, name("identity priority 1-3, 275 snps.csv"), row=F, quote=F, sep=',')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment