Skip to content

Instantly share code, notes, and snippets.

@kbroman
Last active August 29, 2015 13:58
Show Gist options
  • Save kbroman/10277099 to your computer and use it in GitHub Desktop.
Save kbroman/10277099 to your computer and use it in GitHub Desktop.
cleanGeno modified to work with riself, risib, dh, and haploids, rather than just backcross
######################################################################
#
# cleanGeno: omit genotypes that are possibly in error, as indicated
# by apparent double-crossovers separated by a distance of
# no more than maxdist and having no more than maxmark
# interior typed markers
#
######################################################################
cleanGeno <-
function(cross, chr, maxdist=2.5, maxmark=2, verbose=TRUE)
{
if(!(class(cross)[1] %in% c("bc", "riself", "risib", "dh", "haploid")))
stop("This function currently only works for crosses with two genotypes")
if(!missing(chr)) cleaned <- subset(cross, chr=chr)
else cleaned <- cross
thechr <- names(cleaned$geno)
totdrop <- 0
maxmaxdist <- max(maxdist)
for(i in thechr) {
xoloc <- locateXO(cleaned, chr=i, full.info=TRUE)
nxo <- sapply(xoloc, function(a) if(is.matrix(a)) return(nrow(a)) else return(0))
g <- pull.geno(cleaned, chr=i)
ndrop <- 0
for(j in which(nxo > 1)) {
maxd <- xoloc[[j]][-1,"right"] - xoloc[[j]][-nrow(xoloc[[j]]),"left"]
wh <- maxd <= maxmaxdist
if(any(wh)) {
for(k in which(wh)) {
nt <- sum(!is.na(g[j,(xoloc[[j]][k,"ileft"]+1):(xoloc[[j]][k+1,"iright"]-1)]))
if(nt > 0 && any(nt <= maxmark & maxd[k] < maxdist)) {
cleaned$geno[[i]]$data[j,(xoloc[[j]][k,"ileft"]+1):(xoloc[[j]][k+1,"iright"]-1)] <- NA
ndrop <- ndrop + nt
totdrop <- totdrop + nt
}
}
}
}
if(verbose && ndrop > 0) {
totgen <- sum(ntyped(subset(cross, chr=i)))
cat(" ---Dropping ", ndrop, " genotypes (out of ", totgen, ") on chr ", i, "\n", sep="")
}
}
if(verbose && nchr(cleaned)>1 && totdrop > 0) {
totgen <- sum(ntyped(subset(cross, chr=thechr)))
cat(" ---Dropped ", totdrop, " genotypes (out of ", totgen, ") in total\n", sep="")
}
for(i in names(cleaned$geno))
cross$geno[[i]] <- cleaned$geno[[i]]
cross
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment