Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save stephenturner/916791 to your computer and use it in GitHub Desktop.
Save stephenturner/916791 to your computer and use it in GitHub Desktop.
2011-04-12 get flanking seq for igf1.r
## Load the BSgenome package and download the hg19 reference sequence
## http://www.bioconductor.org/packages/release/bioc/html/BSgenome.html
# source("http://www.bioconductor.org/biocLite.R")
# biocLite("BSgenome")
# biocLite("BSgenome.Hsapiens.UCSC.hg19")
library('BSgenome.Hsapiens.UCSC.hg19')
installed.genomes()
getflank <- function(position, alleles="[N/N]", chr="chr12", offset=10) {
leftflank <- getSeq(Hsapiens,chr,position-offset,position-1)
rightflank <- getSeq(Hsapiens,chr,position+1,position+offset)
paste(leftflank,alleles,rightflank,sep="")
}
d <- query("
select
if(b.snp='.' OR b.snp IS NULL, CONCAT_WS('_','SNP','chr12',pos19), b.snp) as snp,
'SNP' as target_type, a.chrom, pos19, CONCAT('[',alleles,']') as alleles, priority,
bestmaf as priority_tiebreaker,
callrate as gwas_callrate, maf as gwas_maf,
'hg19/b37' AS Genome_Build_Version,
'HSapiensReferenceSequence' as Source,
'hg19' as Source_version,
'Forward' as Sequence_Orientation,
'Plus' as Plus_Minus
FROM igf1.union_nomismatch_priority a
LEFT JOIN igf1.1000g_sites b on a.pos19=b.pos
LEFT JOIN igf1.gwas_overlap c on b.snp=c.snp;
")
# library(sqldf)
# d <- sqldf("select snp, chrom, pos19, alleles from d where snp like 'rs%' limit 10;")
# d
i <- 1
d$sequence=NA
t1=Sys.time()
for (i in 1:nrow(d)) d$sequence[i] <- getflank(d$pos19[i], alleles=d$alleles[i], offset=60) #this takes a while!
t2=Sys.time()
t2-t1
sum(is.na(d$sequence))
write.table(d, "2011-04-12 SNPs with Flanking Sequence.csv", row=F, quote=F, sep=',')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment