Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created March 24, 2011 03:14
Show Gist options
  • Save stephenturner/884488 to your computer and use it in GitHub Desktop.
Save stephenturner/884488 to your computer and use it in GitHub Desktop.
pickSNPs.r
#-------------------------------------------------------------------------------------
# Select a set of SNPs > dist bp (default 100kb) apart
# Map is a matrix with colnames "chrom" and "position".
# Matrix MUST BE sorted by chrom,position
# Can have more columns but the colnames "chrom" and "position" can't be changed
# The function returns a vector of row indices corresponding
# to the rows you should pick for your subset.
#-------------------------------------------------------------------------------------
pickSNPs<-function(map, dist = 100000) {
t=as.data.frame(table(map$chrom))
vec = map$position
subs = c(1,rep(NA,nrow(map)-1)); # length(subs) = nrow, but the 1st element is 1 => always select the 1st snp
for (k in 1:nrow(t)) { # t: count of SNPs per chr
if (k==1) i=1 else i=sum(t[1:(k-1),2])+1; # the 1st SNP on each ch
subs[i] = i
stop=sum(t[1:k,2])
while (i<stop) {
for (j in (i+1):stop) {
if ((vec[j]-vec[i]) > dist) {
#cat(i, vec[i], j, vec[j],vec[j]-vec[i], x[j],'\n');
subs[j]= j;
i=j;
next; # jump out of loop
} else if (j==stop) i=stop
}
}
}
subs[!is.na(subs)] # row number of selected SNPs
}
#-------------------------------------------------------------------------------------
map <- read.table("mapfile.txt",header=TRUE)
# mapfile might look something like this:
# snp chrom position major minor
# 1 rs9701055 1 555296 C T
# 2 rs9699599 1 558185 A G
# 3 rs12565286 1 711153 G C
# 4 rs28659788 1 713170 C G
# 5 rs11804171 1 713682 T A
# 6 rs2977656 1 719811 C T
# 7 rs12082473 1 730720 G A
# 8 rs12138618 1 740098 G A
# 9 rs3094315 1 742429 G A
# 1 rs3131972 1 742584 A G
# Select SNPs 500kb apart
keepRows <- pickSNPs(map,dist=500000)
mapsubset <- map[keepRows, ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment