Last active
December 22, 2015 06:19
-
-
Save kgturner/6430062 to your computer and use it in GitHub Desktop.
Find two minima around a peak, given data structured like an output kmer counting histogram table from jellyfish. Requires 'zoo' and 'ggplot2' packages. More information at http://alienplantation.wordpress.com/2013/09/04/nuts-and-bolts-finding-minima-around-a-peak-in-r/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#####Minima.mer.func.R##### | |
#Kathryn Turner June 21, 2013 | |
#Given an output kmer counting histogram table from jellyfish, | |
#find two minima around a peak | |
#input table from jellyfish | |
dat <- read.table("data.from.jellyfish.hist", head=T) | |
head(dat) | |
frequency multiplicity | |
1 513363 1 | |
2 2024 10 | |
3 25 100 | |
4 21 101 | |
5 18 102 | |
6 22 103 | |
#run function, requires instalation of zoo and ggplot2 packages | |
#specify data as dataframe, window along which local minima is determined, | |
#cutoff1 (low) and cutoff2 (high) to subset data around specific peak | |
#breaks determines number of bins in subset | |
#default prints multiplicity and frequency of first minima | |
#extra plots graph and print starting AND stopping minima around peak (hopefully) | |
Minima.mer <- function(kmerdat, window=4, cutoff1=3, cutoff2=100, breaks=100, extra=FALSE){ | |
library(zoo) | |
#limit scope of analysis around peak of interest w/cutoffs | |
sub <- kmerdat[kmerdat$multiplicity<cutoff2 & kmerdat$multiplicity>cutoff1,] | |
sub <- sub[with(sub, order(multiplicity)), ] | |
freq <- sub$frequency | |
mul <- sub$multiplicity | |
binned <- data.frame(mul=mul, bin = cut(mul, breaks=breaks)) | |
binned2 <- aggregate(freq~bin, data=binned, sum) | |
freqz <- as.zoo(binned2$freq) | |
rfreqz <- rollapply(freqz, window, function(x) which.min(x)==2) | |
minima <- as.vector(index(rfreqz)[coredata(rfreqz)]) | |
binned3 <- binned2 | |
binned4<-data.frame(do.call('rbind', strsplit(as.character(binned3$bin), ',', fixed=TRUE))) | |
binned3 <- cbind(binned3, binned4) | |
binned3$X1 <- as.numeric(gsub( "\\(", "", as.character(binned3$X1))) | |
binned3$X2 <- as.numeric(gsub( "\\]", "", as.character(binned3$X2))) | |
print(kmerdat[kmerdat$multiplicity>=binned3[minima[1],]$X1 & kmerdat$multiplicity<=binned3[minima[1],]$X2,]) | |
if (extra==TRUE){ | |
print("Start minima") | |
print(binned2[minima[1],]) | |
print("Stop minima") | |
print(binned2[minima[2],]) | |
library(ggplot2) | |
p <- qplot(x=binned2$bin, y=binned2$freq, xlab="binned multiplicity", ylab="binned frequency") | |
return(p+ annotate("point", x=binned2$bin[minima[1]], y=binned2$freq[minima[1]], size = 5, color = "green")+ | |
annotate("point",x=binned2$bin[minima[2]], y=binned2$freq[minima[2]], size = 5, color = "red")) | |
} | |
} | |
#run function on data | |
Minima.mer(dat) | |
Minima.mer(dat,window=3) | |
Minima.mer(dat, extra=TRUE) | |
#play with window size to find appropriately located minima | |
#adjust cutoff1 and cutoff2 to subset data |
Fixed, thanks for the comment!
Not working with latest jellyfish 2.20 .
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There's a typo in
I think it should be